sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.build_cast( 763 strict=self.STRICT_CAST, this=this, to=to 764 ), 765 TokenType.ARROW: lambda self, this, path: self.expression( 766 exp.JSONExtract, 767 this=this, 768 expression=self.dialect.to_json_path(path), 769 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 770 ), 771 TokenType.DARROW: lambda self, this, path: self.expression( 772 exp.JSONExtractScalar, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtract, 779 this=this, 780 expression=path, 781 ), 782 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtractScalar, 784 this=this, 785 expression=path, 786 ), 787 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 788 exp.JSONBContains, 789 this=this, 790 expression=key, 791 ), 792 } 793 794 CAST_COLUMN_OPERATORS = { 795 TokenType.DOTCOLON, 796 TokenType.DCOLON, 797 } 798 799 EXPRESSION_PARSERS = { 800 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 801 exp.Column: lambda self: self._parse_column(), 802 exp.Condition: lambda self: self._parse_assignment(), 803 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 804 exp.Expression: lambda self: self._parse_expression(), 805 exp.From: lambda self: self._parse_from(joins=True), 806 exp.Group: lambda self: self._parse_group(), 807 exp.Having: lambda self: self._parse_having(), 808 exp.Hint: lambda self: self._parse_hint_body(), 809 exp.Identifier: lambda self: self._parse_id_var(), 810 exp.Join: lambda self: self._parse_join(), 811 exp.Lambda: lambda self: self._parse_lambda(), 812 exp.Lateral: lambda self: self._parse_lateral(), 813 exp.Limit: lambda self: self._parse_limit(), 814 exp.Offset: lambda self: self._parse_offset(), 815 exp.Order: lambda self: self._parse_order(), 816 exp.Ordered: lambda self: self._parse_ordered(), 817 exp.Properties: lambda self: self._parse_properties(), 818 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 819 exp.Qualify: lambda self: self._parse_qualify(), 820 exp.Returning: lambda self: self._parse_returning(), 821 exp.Select: lambda self: self._parse_select(), 822 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 823 exp.Table: lambda self: self._parse_table_parts(), 824 exp.TableAlias: lambda self: self._parse_table_alias(), 825 exp.Tuple: lambda self: self._parse_value(values=False), 826 exp.Whens: lambda self: self._parse_when_matched(), 827 exp.Where: lambda self: self._parse_where(), 828 exp.Window: lambda self: self._parse_named_window(), 829 exp.With: lambda self: self._parse_with(), 830 "JOIN_TYPE": lambda self: self._parse_join_parts(), 831 } 832 833 STATEMENT_PARSERS = { 834 TokenType.ALTER: lambda self: self._parse_alter(), 835 TokenType.ANALYZE: lambda self: self._parse_analyze(), 836 TokenType.BEGIN: lambda self: self._parse_transaction(), 837 TokenType.CACHE: lambda self: self._parse_cache(), 838 TokenType.COMMENT: lambda self: self._parse_comment(), 839 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 840 TokenType.COPY: lambda self: self._parse_copy(), 841 TokenType.CREATE: lambda self: self._parse_create(), 842 TokenType.DELETE: lambda self: self._parse_delete(), 843 TokenType.DESC: lambda self: self._parse_describe(), 844 TokenType.DESCRIBE: lambda self: self._parse_describe(), 845 TokenType.DROP: lambda self: self._parse_drop(), 846 TokenType.GRANT: lambda self: self._parse_grant(), 847 TokenType.INSERT: lambda self: self._parse_insert(), 848 TokenType.KILL: lambda self: self._parse_kill(), 849 TokenType.LOAD: lambda self: self._parse_load(), 850 TokenType.MERGE: lambda self: self._parse_merge(), 851 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 852 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 853 TokenType.REFRESH: lambda self: self._parse_refresh(), 854 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 855 TokenType.SET: lambda self: self._parse_set(), 856 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 857 TokenType.UNCACHE: lambda self: self._parse_uncache(), 858 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 859 TokenType.UPDATE: lambda self: self._parse_update(), 860 TokenType.USE: lambda self: self._parse_use(), 861 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 862 } 863 864 UNARY_PARSERS = { 865 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 866 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 867 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 868 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 869 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 870 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 871 } 872 873 STRING_PARSERS = { 874 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 875 exp.RawString, this=token.text 876 ), 877 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 878 exp.National, this=token.text 879 ), 880 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 881 TokenType.STRING: lambda self, token: self.expression( 882 exp.Literal, this=token.text, is_string=True 883 ), 884 TokenType.UNICODE_STRING: lambda self, token: self.expression( 885 exp.UnicodeString, 886 this=token.text, 887 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 888 ), 889 } 890 891 NUMERIC_PARSERS = { 892 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 893 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 894 TokenType.HEX_STRING: lambda self, token: self.expression( 895 exp.HexString, 896 this=token.text, 897 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 898 ), 899 TokenType.NUMBER: lambda self, token: self.expression( 900 exp.Literal, this=token.text, is_string=False 901 ), 902 } 903 904 PRIMARY_PARSERS = { 905 **STRING_PARSERS, 906 **NUMERIC_PARSERS, 907 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 908 TokenType.NULL: lambda self, _: self.expression(exp.Null), 909 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 910 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 911 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 912 TokenType.STAR: lambda self, _: self._parse_star_ops(), 913 } 914 915 PLACEHOLDER_PARSERS = { 916 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 917 TokenType.PARAMETER: lambda self: self._parse_parameter(), 918 TokenType.COLON: lambda self: ( 919 self.expression(exp.Placeholder, this=self._prev.text) 920 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 921 else None 922 ), 923 } 924 925 RANGE_PARSERS = { 926 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 927 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 928 TokenType.GLOB: binary_range_parser(exp.Glob), 929 TokenType.ILIKE: binary_range_parser(exp.ILike), 930 TokenType.IN: lambda self, this: self._parse_in(this), 931 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 932 TokenType.IS: lambda self, this: self._parse_is(this), 933 TokenType.LIKE: binary_range_parser(exp.Like), 934 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 935 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 936 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 937 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 938 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 939 } 940 941 PIPE_SYNTAX_TRANSFORM_PARSERS = { 942 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 943 "AS": lambda self, query: self._build_pipe_cte( 944 query, [exp.Star()], self._parse_table_alias() 945 ), 946 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 947 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 948 "ORDER BY": lambda self, query: query.order_by( 949 self._parse_order(), append=False, copy=False 950 ), 951 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 953 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 954 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 955 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 956 } 957 958 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 959 "ALLOWED_VALUES": lambda self: self.expression( 960 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 961 ), 962 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 963 "AUTO": lambda self: self._parse_auto_property(), 964 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 965 "BACKUP": lambda self: self.expression( 966 exp.BackupProperty, this=self._parse_var(any_token=True) 967 ), 968 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 969 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 970 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHECKSUM": lambda self: self._parse_checksum(), 972 "CLUSTER BY": lambda self: self._parse_cluster(), 973 "CLUSTERED": lambda self: self._parse_clustered_by(), 974 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 975 exp.CollateProperty, **kwargs 976 ), 977 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 978 "CONTAINS": lambda self: self._parse_contains_property(), 979 "COPY": lambda self: self._parse_copy_property(), 980 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 981 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 982 "DEFINER": lambda self: self._parse_definer(), 983 "DETERMINISTIC": lambda self: self.expression( 984 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 985 ), 986 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 987 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 988 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 989 "DISTKEY": lambda self: self._parse_distkey(), 990 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 991 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 992 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 993 "ENVIRONMENT": lambda self: self.expression( 994 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 995 ), 996 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 997 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 998 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 999 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1000 "FREESPACE": lambda self: self._parse_freespace(), 1001 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1002 "HEAP": lambda self: self.expression(exp.HeapProperty), 1003 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1004 "IMMUTABLE": lambda self: self.expression( 1005 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1006 ), 1007 "INHERITS": lambda self: self.expression( 1008 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1009 ), 1010 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1011 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1012 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1013 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1014 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1015 "LIKE": lambda self: self._parse_create_like(), 1016 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1017 "LOCK": lambda self: self._parse_locking(), 1018 "LOCKING": lambda self: self._parse_locking(), 1019 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1020 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1021 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1022 "MODIFIES": lambda self: self._parse_modifies_property(), 1023 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1024 "NO": lambda self: self._parse_no_property(), 1025 "ON": lambda self: self._parse_on_property(), 1026 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1027 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1028 "PARTITION": lambda self: self._parse_partitioned_of(), 1029 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1030 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1032 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1033 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1034 "READS": lambda self: self._parse_reads_property(), 1035 "REMOTE": lambda self: self._parse_remote_with_connection(), 1036 "RETURNS": lambda self: self._parse_returns(), 1037 "STRICT": lambda self: self.expression(exp.StrictProperty), 1038 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1039 "ROW": lambda self: self._parse_row(), 1040 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1041 "SAMPLE": lambda self: self.expression( 1042 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1043 ), 1044 "SECURE": lambda self: self.expression(exp.SecureProperty), 1045 "SECURITY": lambda self: self._parse_security(), 1046 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1047 "SETTINGS": lambda self: self._parse_settings_property(), 1048 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1049 "SORTKEY": lambda self: self._parse_sortkey(), 1050 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1051 "STABLE": lambda self: self.expression( 1052 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1053 ), 1054 "STORED": lambda self: self._parse_stored(), 1055 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1056 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1057 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1058 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1059 "TO": lambda self: self._parse_to_table(), 1060 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1061 "TRANSFORM": lambda self: self.expression( 1062 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1063 ), 1064 "TTL": lambda self: self._parse_ttl(), 1065 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1066 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1067 "VOLATILE": lambda self: self._parse_volatile_property(), 1068 "WITH": lambda self: self._parse_with_property(), 1069 } 1070 1071 CONSTRAINT_PARSERS = { 1072 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1073 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1074 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1075 "CHARACTER SET": lambda self: self.expression( 1076 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1077 ), 1078 "CHECK": lambda self: self.expression( 1079 exp.CheckColumnConstraint, 1080 this=self._parse_wrapped(self._parse_assignment), 1081 enforced=self._match_text_seq("ENFORCED"), 1082 ), 1083 "COLLATE": lambda self: self.expression( 1084 exp.CollateColumnConstraint, 1085 this=self._parse_identifier() or self._parse_column(), 1086 ), 1087 "COMMENT": lambda self: self.expression( 1088 exp.CommentColumnConstraint, this=self._parse_string() 1089 ), 1090 "COMPRESS": lambda self: self._parse_compress(), 1091 "CLUSTERED": lambda self: self.expression( 1092 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "NONCLUSTERED": lambda self: self.expression( 1095 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1096 ), 1097 "DEFAULT": lambda self: self.expression( 1098 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1101 "EPHEMERAL": lambda self: self.expression( 1102 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1103 ), 1104 "EXCLUDE": lambda self: self.expression( 1105 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1106 ), 1107 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1108 "FORMAT": lambda self: self.expression( 1109 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1110 ), 1111 "GENERATED": lambda self: self._parse_generated_as_identity(), 1112 "IDENTITY": lambda self: self._parse_auto_increment(), 1113 "INLINE": lambda self: self._parse_inline(), 1114 "LIKE": lambda self: self._parse_create_like(), 1115 "NOT": lambda self: self._parse_not_constraint(), 1116 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1117 "ON": lambda self: ( 1118 self._match(TokenType.UPDATE) 1119 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1120 ) 1121 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1122 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1123 "PERIOD": lambda self: self._parse_period_for_system_time(), 1124 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1125 "REFERENCES": lambda self: self._parse_references(match=False), 1126 "TITLE": lambda self: self.expression( 1127 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1128 ), 1129 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1130 "UNIQUE": lambda self: self._parse_unique(), 1131 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1132 "WATERMARK": lambda self: self.expression( 1133 exp.WatermarkColumnConstraint, 1134 this=self._match(TokenType.FOR) and self._parse_column(), 1135 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1136 ), 1137 "WITH": lambda self: self.expression( 1138 exp.Properties, expressions=self._parse_wrapped_properties() 1139 ), 1140 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1141 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 } 1143 1144 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1145 if not self._match(TokenType.L_PAREN, advance=False): 1146 # Partitioning by bucket or truncate follows the syntax: 1147 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1148 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1149 self._retreat(self._index - 1) 1150 return None 1151 1152 klass = ( 1153 exp.PartitionedByBucket 1154 if self._prev.text.upper() == "BUCKET" 1155 else exp.PartitionByTruncate 1156 ) 1157 1158 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1159 this, expression = seq_get(args, 0), seq_get(args, 1) 1160 1161 if isinstance(this, exp.Literal): 1162 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1163 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1164 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1165 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1166 # 1167 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1168 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1169 this, expression = expression, this 1170 1171 return self.expression(klass, this=this, expression=expression) 1172 1173 ALTER_PARSERS = { 1174 "ADD": lambda self: self._parse_alter_table_add(), 1175 "AS": lambda self: self._parse_select(), 1176 "ALTER": lambda self: self._parse_alter_table_alter(), 1177 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1178 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1179 "DROP": lambda self: self._parse_alter_table_drop(), 1180 "RENAME": lambda self: self._parse_alter_table_rename(), 1181 "SET": lambda self: self._parse_alter_table_set(), 1182 "SWAP": lambda self: self.expression( 1183 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1184 ), 1185 } 1186 1187 ALTER_ALTER_PARSERS = { 1188 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1189 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1190 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1191 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1192 } 1193 1194 SCHEMA_UNNAMED_CONSTRAINTS = { 1195 "CHECK", 1196 "EXCLUDE", 1197 "FOREIGN KEY", 1198 "LIKE", 1199 "PERIOD", 1200 "PRIMARY KEY", 1201 "UNIQUE", 1202 "WATERMARK", 1203 "BUCKET", 1204 "TRUNCATE", 1205 } 1206 1207 NO_PAREN_FUNCTION_PARSERS = { 1208 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1209 "CASE": lambda self: self._parse_case(), 1210 "CONNECT_BY_ROOT": lambda self: self.expression( 1211 exp.ConnectByRoot, this=self._parse_column() 1212 ), 1213 "IF": lambda self: self._parse_if(), 1214 } 1215 1216 INVALID_FUNC_NAME_TOKENS = { 1217 TokenType.IDENTIFIER, 1218 TokenType.STRING, 1219 } 1220 1221 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1222 1223 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1224 1225 FUNCTION_PARSERS = { 1226 **{ 1227 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1228 }, 1229 **{ 1230 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1231 }, 1232 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1233 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1234 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1235 "DECODE": lambda self: self._parse_decode(), 1236 "EXTRACT": lambda self: self._parse_extract(), 1237 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1238 "GAP_FILL": lambda self: self._parse_gap_fill(), 1239 "JSON_OBJECT": lambda self: self._parse_json_object(), 1240 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1241 "JSON_TABLE": lambda self: self._parse_json_table(), 1242 "MATCH": lambda self: self._parse_match_against(), 1243 "NORMALIZE": lambda self: self._parse_normalize(), 1244 "OPENJSON": lambda self: self._parse_open_json(), 1245 "OVERLAY": lambda self: self._parse_overlay(), 1246 "POSITION": lambda self: self._parse_position(), 1247 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1248 "STRING_AGG": lambda self: self._parse_string_agg(), 1249 "SUBSTRING": lambda self: self._parse_substring(), 1250 "TRIM": lambda self: self._parse_trim(), 1251 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1252 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1253 "XMLELEMENT": lambda self: self.expression( 1254 exp.XMLElement, 1255 this=self._match_text_seq("NAME") and self._parse_id_var(), 1256 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1257 ), 1258 "XMLTABLE": lambda self: self._parse_xml_table(), 1259 } 1260 1261 QUERY_MODIFIER_PARSERS = { 1262 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1263 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1264 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1265 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1266 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1267 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1268 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1269 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1270 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1271 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1272 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1273 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1274 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1275 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1276 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1277 TokenType.CLUSTER_BY: lambda self: ( 1278 "cluster", 1279 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1280 ), 1281 TokenType.DISTRIBUTE_BY: lambda self: ( 1282 "distribute", 1283 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1284 ), 1285 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1286 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1287 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1288 } 1289 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1290 1291 SET_PARSERS = { 1292 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1293 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1294 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1295 "TRANSACTION": lambda self: self._parse_set_transaction(), 1296 } 1297 1298 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1299 1300 TYPE_LITERAL_PARSERS = { 1301 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1302 } 1303 1304 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1305 1306 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1307 1308 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1309 1310 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1311 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1312 "ISOLATION": ( 1313 ("LEVEL", "REPEATABLE", "READ"), 1314 ("LEVEL", "READ", "COMMITTED"), 1315 ("LEVEL", "READ", "UNCOMITTED"), 1316 ("LEVEL", "SERIALIZABLE"), 1317 ), 1318 "READ": ("WRITE", "ONLY"), 1319 } 1320 1321 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1322 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1323 ) 1324 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1325 1326 CREATE_SEQUENCE: OPTIONS_TYPE = { 1327 "SCALE": ("EXTEND", "NOEXTEND"), 1328 "SHARD": ("EXTEND", "NOEXTEND"), 1329 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1330 **dict.fromkeys( 1331 ( 1332 "SESSION", 1333 "GLOBAL", 1334 "KEEP", 1335 "NOKEEP", 1336 "ORDER", 1337 "NOORDER", 1338 "NOCACHE", 1339 "CYCLE", 1340 "NOCYCLE", 1341 "NOMINVALUE", 1342 "NOMAXVALUE", 1343 "NOSCALE", 1344 "NOSHARD", 1345 ), 1346 tuple(), 1347 ), 1348 } 1349 1350 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1351 1352 USABLES: OPTIONS_TYPE = dict.fromkeys( 1353 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1354 ) 1355 1356 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1357 1358 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1359 "TYPE": ("EVOLUTION",), 1360 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1361 } 1362 1363 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1364 1365 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1366 1367 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1368 "NOT": ("ENFORCED",), 1369 "MATCH": ( 1370 "FULL", 1371 "PARTIAL", 1372 "SIMPLE", 1373 ), 1374 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1375 "USING": ( 1376 "BTREE", 1377 "HASH", 1378 ), 1379 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1380 } 1381 1382 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1383 "NO": ("OTHERS",), 1384 "CURRENT": ("ROW",), 1385 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1386 } 1387 1388 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1389 1390 CLONE_KEYWORDS = {"CLONE", "COPY"} 1391 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1392 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1393 1394 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1395 1396 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1397 1398 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1399 1400 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1401 1402 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1403 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1404 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1405 1406 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1407 1408 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1409 1410 ADD_CONSTRAINT_TOKENS = { 1411 TokenType.CONSTRAINT, 1412 TokenType.FOREIGN_KEY, 1413 TokenType.INDEX, 1414 TokenType.KEY, 1415 TokenType.PRIMARY_KEY, 1416 TokenType.UNIQUE, 1417 } 1418 1419 DISTINCT_TOKENS = {TokenType.DISTINCT} 1420 1421 NULL_TOKENS = {TokenType.NULL} 1422 1423 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1424 1425 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1426 1427 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1428 1429 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1430 1431 ODBC_DATETIME_LITERALS = { 1432 "d": exp.Date, 1433 "t": exp.Time, 1434 "ts": exp.Timestamp, 1435 } 1436 1437 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1438 1439 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1440 1441 # The style options for the DESCRIBE statement 1442 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1443 1444 # The style options for the ANALYZE statement 1445 ANALYZE_STYLES = { 1446 "BUFFER_USAGE_LIMIT", 1447 "FULL", 1448 "LOCAL", 1449 "NO_WRITE_TO_BINLOG", 1450 "SAMPLE", 1451 "SKIP_LOCKED", 1452 "VERBOSE", 1453 } 1454 1455 ANALYZE_EXPRESSION_PARSERS = { 1456 "ALL": lambda self: self._parse_analyze_columns(), 1457 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1458 "DELETE": lambda self: self._parse_analyze_delete(), 1459 "DROP": lambda self: self._parse_analyze_histogram(), 1460 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1461 "LIST": lambda self: self._parse_analyze_list(), 1462 "PREDICATE": lambda self: self._parse_analyze_columns(), 1463 "UPDATE": lambda self: self._parse_analyze_histogram(), 1464 "VALIDATE": lambda self: self._parse_analyze_validate(), 1465 } 1466 1467 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1468 1469 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1470 1471 OPERATION_MODIFIERS: t.Set[str] = set() 1472 1473 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1474 1475 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1476 1477 STRICT_CAST = True 1478 1479 PREFIXED_PIVOT_COLUMNS = False 1480 IDENTIFY_PIVOT_STRINGS = False 1481 1482 LOG_DEFAULTS_TO_LN = False 1483 1484 # Whether the table sample clause expects CSV syntax 1485 TABLESAMPLE_CSV = False 1486 1487 # The default method used for table sampling 1488 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1489 1490 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1491 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1492 1493 # Whether the TRIM function expects the characters to trim as its first argument 1494 TRIM_PATTERN_FIRST = False 1495 1496 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1497 STRING_ALIASES = False 1498 1499 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1500 MODIFIERS_ATTACHED_TO_SET_OP = True 1501 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1502 1503 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1504 NO_PAREN_IF_COMMANDS = True 1505 1506 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1507 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1508 1509 # Whether the `:` operator is used to extract a value from a VARIANT column 1510 COLON_IS_VARIANT_EXTRACT = False 1511 1512 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1513 # If this is True and '(' is not found, the keyword will be treated as an identifier 1514 VALUES_FOLLOWED_BY_PAREN = True 1515 1516 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1517 SUPPORTS_IMPLICIT_UNNEST = False 1518 1519 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1520 INTERVAL_SPANS = True 1521 1522 # Whether a PARTITION clause can follow a table reference 1523 SUPPORTS_PARTITION_SELECTION = False 1524 1525 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1526 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1527 1528 # Whether the 'AS' keyword is optional in the CTE definition syntax 1529 OPTIONAL_ALIAS_TOKEN_CTE = True 1530 1531 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1532 ALTER_RENAME_REQUIRES_COLUMN = True 1533 1534 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1535 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1536 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1537 # as BigQuery, where all joins have the same precedence. 1538 JOINS_HAVE_EQUAL_PRECEDENCE = False 1539 1540 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1541 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1542 1543 # Whether map literals support arbitrary expressions as keys. 1544 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1545 # When False, keys are typically restricted to identifiers. 1546 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1547 1548 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1549 # is true for Snowflake but not for BigQuery which can also process strings 1550 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1551 1552 __slots__ = ( 1553 "error_level", 1554 "error_message_context", 1555 "max_errors", 1556 "dialect", 1557 "sql", 1558 "errors", 1559 "_tokens", 1560 "_index", 1561 "_curr", 1562 "_next", 1563 "_prev", 1564 "_prev_comments", 1565 "_pipe_cte_counter", 1566 ) 1567 1568 # Autofilled 1569 SHOW_TRIE: t.Dict = {} 1570 SET_TRIE: t.Dict = {} 1571 1572 def __init__( 1573 self, 1574 error_level: t.Optional[ErrorLevel] = None, 1575 error_message_context: int = 100, 1576 max_errors: int = 3, 1577 dialect: DialectType = None, 1578 ): 1579 from sqlglot.dialects import Dialect 1580 1581 self.error_level = error_level or ErrorLevel.IMMEDIATE 1582 self.error_message_context = error_message_context 1583 self.max_errors = max_errors 1584 self.dialect = Dialect.get_or_raise(dialect) 1585 self.reset() 1586 1587 def reset(self): 1588 self.sql = "" 1589 self.errors = [] 1590 self._tokens = [] 1591 self._index = 0 1592 self._curr = None 1593 self._next = None 1594 self._prev = None 1595 self._prev_comments = None 1596 self._pipe_cte_counter = 0 1597 1598 def parse( 1599 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1600 ) -> t.List[t.Optional[exp.Expression]]: 1601 """ 1602 Parses a list of tokens and returns a list of syntax trees, one tree 1603 per parsed SQL statement. 1604 1605 Args: 1606 raw_tokens: The list of tokens. 1607 sql: The original SQL string, used to produce helpful debug messages. 1608 1609 Returns: 1610 The list of the produced syntax trees. 1611 """ 1612 return self._parse( 1613 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1614 ) 1615 1616 def parse_into( 1617 self, 1618 expression_types: exp.IntoType, 1619 raw_tokens: t.List[Token], 1620 sql: t.Optional[str] = None, 1621 ) -> t.List[t.Optional[exp.Expression]]: 1622 """ 1623 Parses a list of tokens into a given Expression type. If a collection of Expression 1624 types is given instead, this method will try to parse the token list into each one 1625 of them, stopping at the first for which the parsing succeeds. 1626 1627 Args: 1628 expression_types: The expression type(s) to try and parse the token list into. 1629 raw_tokens: The list of tokens. 1630 sql: The original SQL string, used to produce helpful debug messages. 1631 1632 Returns: 1633 The target Expression. 1634 """ 1635 errors = [] 1636 for expression_type in ensure_list(expression_types): 1637 parser = self.EXPRESSION_PARSERS.get(expression_type) 1638 if not parser: 1639 raise TypeError(f"No parser registered for {expression_type}") 1640 1641 try: 1642 return self._parse(parser, raw_tokens, sql) 1643 except ParseError as e: 1644 e.errors[0]["into_expression"] = expression_type 1645 errors.append(e) 1646 1647 raise ParseError( 1648 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1649 errors=merge_errors(errors), 1650 ) from errors[-1] 1651 1652 def _parse( 1653 self, 1654 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1655 raw_tokens: t.List[Token], 1656 sql: t.Optional[str] = None, 1657 ) -> t.List[t.Optional[exp.Expression]]: 1658 self.reset() 1659 self.sql = sql or "" 1660 1661 total = len(raw_tokens) 1662 chunks: t.List[t.List[Token]] = [[]] 1663 1664 for i, token in enumerate(raw_tokens): 1665 if token.token_type == TokenType.SEMICOLON: 1666 if token.comments: 1667 chunks.append([token]) 1668 1669 if i < total - 1: 1670 chunks.append([]) 1671 else: 1672 chunks[-1].append(token) 1673 1674 expressions = [] 1675 1676 for tokens in chunks: 1677 self._index = -1 1678 self._tokens = tokens 1679 self._advance() 1680 1681 expressions.append(parse_method(self)) 1682 1683 if self._index < len(self._tokens): 1684 self.raise_error("Invalid expression / Unexpected token") 1685 1686 self.check_errors() 1687 1688 return expressions 1689 1690 def check_errors(self) -> None: 1691 """Logs or raises any found errors, depending on the chosen error level setting.""" 1692 if self.error_level == ErrorLevel.WARN: 1693 for error in self.errors: 1694 logger.error(str(error)) 1695 elif self.error_level == ErrorLevel.RAISE and self.errors: 1696 raise ParseError( 1697 concat_messages(self.errors, self.max_errors), 1698 errors=merge_errors(self.errors), 1699 ) 1700 1701 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1702 """ 1703 Appends an error in the list of recorded errors or raises it, depending on the chosen 1704 error level setting. 1705 """ 1706 token = token or self._curr or self._prev or Token.string("") 1707 start = token.start 1708 end = token.end + 1 1709 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1710 highlight = self.sql[start:end] 1711 end_context = self.sql[end : end + self.error_message_context] 1712 1713 error = ParseError.new( 1714 f"{message}. Line {token.line}, Col: {token.col}.\n" 1715 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1716 description=message, 1717 line=token.line, 1718 col=token.col, 1719 start_context=start_context, 1720 highlight=highlight, 1721 end_context=end_context, 1722 ) 1723 1724 if self.error_level == ErrorLevel.IMMEDIATE: 1725 raise error 1726 1727 self.errors.append(error) 1728 1729 def expression( 1730 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1731 ) -> E: 1732 """ 1733 Creates a new, validated Expression. 1734 1735 Args: 1736 exp_class: The expression class to instantiate. 1737 comments: An optional list of comments to attach to the expression. 1738 kwargs: The arguments to set for the expression along with their respective values. 1739 1740 Returns: 1741 The target expression. 1742 """ 1743 instance = exp_class(**kwargs) 1744 instance.add_comments(comments) if comments else self._add_comments(instance) 1745 return self.validate_expression(instance) 1746 1747 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1748 if expression and self._prev_comments: 1749 expression.add_comments(self._prev_comments) 1750 self._prev_comments = None 1751 1752 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1753 """ 1754 Validates an Expression, making sure that all its mandatory arguments are set. 1755 1756 Args: 1757 expression: The expression to validate. 1758 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1759 1760 Returns: 1761 The validated expression. 1762 """ 1763 if self.error_level != ErrorLevel.IGNORE: 1764 for error_message in expression.error_messages(args): 1765 self.raise_error(error_message) 1766 1767 return expression 1768 1769 def _find_sql(self, start: Token, end: Token) -> str: 1770 return self.sql[start.start : end.end + 1] 1771 1772 def _is_connected(self) -> bool: 1773 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1774 1775 def _advance(self, times: int = 1) -> None: 1776 self._index += times 1777 self._curr = seq_get(self._tokens, self._index) 1778 self._next = seq_get(self._tokens, self._index + 1) 1779 1780 if self._index > 0: 1781 self._prev = self._tokens[self._index - 1] 1782 self._prev_comments = self._prev.comments 1783 else: 1784 self._prev = None 1785 self._prev_comments = None 1786 1787 def _retreat(self, index: int) -> None: 1788 if index != self._index: 1789 self._advance(index - self._index) 1790 1791 def _warn_unsupported(self) -> None: 1792 if len(self._tokens) <= 1: 1793 return 1794 1795 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1796 # interested in emitting a warning for the one being currently processed. 1797 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1798 1799 logger.warning( 1800 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1801 ) 1802 1803 def _parse_command(self) -> exp.Command: 1804 self._warn_unsupported() 1805 return self.expression( 1806 exp.Command, 1807 comments=self._prev_comments, 1808 this=self._prev.text.upper(), 1809 expression=self._parse_string(), 1810 ) 1811 1812 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1813 """ 1814 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1815 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1816 solve this by setting & resetting the parser state accordingly 1817 """ 1818 index = self._index 1819 error_level = self.error_level 1820 1821 self.error_level = ErrorLevel.IMMEDIATE 1822 try: 1823 this = parse_method() 1824 except ParseError: 1825 this = None 1826 finally: 1827 if not this or retreat: 1828 self._retreat(index) 1829 self.error_level = error_level 1830 1831 return this 1832 1833 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1834 start = self._prev 1835 exists = self._parse_exists() if allow_exists else None 1836 1837 self._match(TokenType.ON) 1838 1839 materialized = self._match_text_seq("MATERIALIZED") 1840 kind = self._match_set(self.CREATABLES) and self._prev 1841 if not kind: 1842 return self._parse_as_command(start) 1843 1844 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1845 this = self._parse_user_defined_function(kind=kind.token_type) 1846 elif kind.token_type == TokenType.TABLE: 1847 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1848 elif kind.token_type == TokenType.COLUMN: 1849 this = self._parse_column() 1850 else: 1851 this = self._parse_id_var() 1852 1853 self._match(TokenType.IS) 1854 1855 return self.expression( 1856 exp.Comment, 1857 this=this, 1858 kind=kind.text, 1859 expression=self._parse_string(), 1860 exists=exists, 1861 materialized=materialized, 1862 ) 1863 1864 def _parse_to_table( 1865 self, 1866 ) -> exp.ToTableProperty: 1867 table = self._parse_table_parts(schema=True) 1868 return self.expression(exp.ToTableProperty, this=table) 1869 1870 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1871 def _parse_ttl(self) -> exp.Expression: 1872 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1873 this = self._parse_bitwise() 1874 1875 if self._match_text_seq("DELETE"): 1876 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1877 if self._match_text_seq("RECOMPRESS"): 1878 return self.expression( 1879 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1880 ) 1881 if self._match_text_seq("TO", "DISK"): 1882 return self.expression( 1883 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1884 ) 1885 if self._match_text_seq("TO", "VOLUME"): 1886 return self.expression( 1887 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1888 ) 1889 1890 return this 1891 1892 expressions = self._parse_csv(_parse_ttl_action) 1893 where = self._parse_where() 1894 group = self._parse_group() 1895 1896 aggregates = None 1897 if group and self._match(TokenType.SET): 1898 aggregates = self._parse_csv(self._parse_set_item) 1899 1900 return self.expression( 1901 exp.MergeTreeTTL, 1902 expressions=expressions, 1903 where=where, 1904 group=group, 1905 aggregates=aggregates, 1906 ) 1907 1908 def _parse_statement(self) -> t.Optional[exp.Expression]: 1909 if self._curr is None: 1910 return None 1911 1912 if self._match_set(self.STATEMENT_PARSERS): 1913 comments = self._prev_comments 1914 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1915 stmt.add_comments(comments, prepend=True) 1916 return stmt 1917 1918 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1919 return self._parse_command() 1920 1921 expression = self._parse_expression() 1922 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1923 return self._parse_query_modifiers(expression) 1924 1925 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1926 start = self._prev 1927 temporary = self._match(TokenType.TEMPORARY) 1928 materialized = self._match_text_seq("MATERIALIZED") 1929 1930 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1931 if not kind: 1932 return self._parse_as_command(start) 1933 1934 concurrently = self._match_text_seq("CONCURRENTLY") 1935 if_exists = exists or self._parse_exists() 1936 1937 if kind == "COLUMN": 1938 this = self._parse_column() 1939 else: 1940 this = self._parse_table_parts( 1941 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1942 ) 1943 1944 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1945 1946 if self._match(TokenType.L_PAREN, advance=False): 1947 expressions = self._parse_wrapped_csv(self._parse_types) 1948 else: 1949 expressions = None 1950 1951 return self.expression( 1952 exp.Drop, 1953 exists=if_exists, 1954 this=this, 1955 expressions=expressions, 1956 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1957 temporary=temporary, 1958 materialized=materialized, 1959 cascade=self._match_text_seq("CASCADE"), 1960 constraints=self._match_text_seq("CONSTRAINTS"), 1961 purge=self._match_text_seq("PURGE"), 1962 cluster=cluster, 1963 concurrently=concurrently, 1964 ) 1965 1966 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1967 return ( 1968 self._match_text_seq("IF") 1969 and (not not_ or self._match(TokenType.NOT)) 1970 and self._match(TokenType.EXISTS) 1971 ) 1972 1973 def _parse_create(self) -> exp.Create | exp.Command: 1974 # Note: this can't be None because we've matched a statement parser 1975 start = self._prev 1976 1977 replace = ( 1978 start.token_type == TokenType.REPLACE 1979 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1980 or self._match_pair(TokenType.OR, TokenType.ALTER) 1981 ) 1982 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1983 1984 unique = self._match(TokenType.UNIQUE) 1985 1986 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1987 clustered = True 1988 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1989 "COLUMNSTORE" 1990 ): 1991 clustered = False 1992 else: 1993 clustered = None 1994 1995 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1996 self._advance() 1997 1998 properties = None 1999 create_token = self._match_set(self.CREATABLES) and self._prev 2000 2001 if not create_token: 2002 # exp.Properties.Location.POST_CREATE 2003 properties = self._parse_properties() 2004 create_token = self._match_set(self.CREATABLES) and self._prev 2005 2006 if not properties or not create_token: 2007 return self._parse_as_command(start) 2008 2009 concurrently = self._match_text_seq("CONCURRENTLY") 2010 exists = self._parse_exists(not_=True) 2011 this = None 2012 expression: t.Optional[exp.Expression] = None 2013 indexes = None 2014 no_schema_binding = None 2015 begin = None 2016 end = None 2017 clone = None 2018 2019 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2020 nonlocal properties 2021 if properties and temp_props: 2022 properties.expressions.extend(temp_props.expressions) 2023 elif temp_props: 2024 properties = temp_props 2025 2026 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2027 this = self._parse_user_defined_function(kind=create_token.token_type) 2028 2029 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2030 extend_props(self._parse_properties()) 2031 2032 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2033 extend_props(self._parse_properties()) 2034 2035 if not expression: 2036 if self._match(TokenType.COMMAND): 2037 expression = self._parse_as_command(self._prev) 2038 else: 2039 begin = self._match(TokenType.BEGIN) 2040 return_ = self._match_text_seq("RETURN") 2041 2042 if self._match(TokenType.STRING, advance=False): 2043 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2044 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2045 expression = self._parse_string() 2046 extend_props(self._parse_properties()) 2047 else: 2048 expression = self._parse_user_defined_function_expression() 2049 2050 end = self._match_text_seq("END") 2051 2052 if return_: 2053 expression = self.expression(exp.Return, this=expression) 2054 elif create_token.token_type == TokenType.INDEX: 2055 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2056 if not self._match(TokenType.ON): 2057 index = self._parse_id_var() 2058 anonymous = False 2059 else: 2060 index = None 2061 anonymous = True 2062 2063 this = self._parse_index(index=index, anonymous=anonymous) 2064 elif create_token.token_type in self.DB_CREATABLES: 2065 table_parts = self._parse_table_parts( 2066 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2067 ) 2068 2069 # exp.Properties.Location.POST_NAME 2070 self._match(TokenType.COMMA) 2071 extend_props(self._parse_properties(before=True)) 2072 2073 this = self._parse_schema(this=table_parts) 2074 2075 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2076 extend_props(self._parse_properties()) 2077 2078 has_alias = self._match(TokenType.ALIAS) 2079 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2080 # exp.Properties.Location.POST_ALIAS 2081 extend_props(self._parse_properties()) 2082 2083 if create_token.token_type == TokenType.SEQUENCE: 2084 expression = self._parse_types() 2085 props = self._parse_properties() 2086 if props: 2087 sequence_props = exp.SequenceProperties() 2088 options = [] 2089 for prop in props: 2090 if isinstance(prop, exp.SequenceProperties): 2091 for arg, value in prop.args.items(): 2092 if arg == "options": 2093 options.extend(value) 2094 else: 2095 sequence_props.set(arg, value) 2096 prop.pop() 2097 2098 if options: 2099 sequence_props.set("options", options) 2100 2101 props.append("expressions", sequence_props) 2102 extend_props(props) 2103 else: 2104 expression = self._parse_ddl_select() 2105 2106 # Some dialects also support using a table as an alias instead of a SELECT. 2107 # Here we fallback to this as an alternative. 2108 if not expression and has_alias: 2109 expression = self._try_parse(self._parse_table_parts) 2110 2111 if create_token.token_type == TokenType.TABLE: 2112 # exp.Properties.Location.POST_EXPRESSION 2113 extend_props(self._parse_properties()) 2114 2115 indexes = [] 2116 while True: 2117 index = self._parse_index() 2118 2119 # exp.Properties.Location.POST_INDEX 2120 extend_props(self._parse_properties()) 2121 if not index: 2122 break 2123 else: 2124 self._match(TokenType.COMMA) 2125 indexes.append(index) 2126 elif create_token.token_type == TokenType.VIEW: 2127 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2128 no_schema_binding = True 2129 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2130 extend_props(self._parse_properties()) 2131 2132 shallow = self._match_text_seq("SHALLOW") 2133 2134 if self._match_texts(self.CLONE_KEYWORDS): 2135 copy = self._prev.text.lower() == "copy" 2136 clone = self.expression( 2137 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2138 ) 2139 2140 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2141 return self._parse_as_command(start) 2142 2143 create_kind_text = create_token.text.upper() 2144 return self.expression( 2145 exp.Create, 2146 this=this, 2147 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2148 replace=replace, 2149 refresh=refresh, 2150 unique=unique, 2151 expression=expression, 2152 exists=exists, 2153 properties=properties, 2154 indexes=indexes, 2155 no_schema_binding=no_schema_binding, 2156 begin=begin, 2157 end=end, 2158 clone=clone, 2159 concurrently=concurrently, 2160 clustered=clustered, 2161 ) 2162 2163 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2164 seq = exp.SequenceProperties() 2165 2166 options = [] 2167 index = self._index 2168 2169 while self._curr: 2170 self._match(TokenType.COMMA) 2171 if self._match_text_seq("INCREMENT"): 2172 self._match_text_seq("BY") 2173 self._match_text_seq("=") 2174 seq.set("increment", self._parse_term()) 2175 elif self._match_text_seq("MINVALUE"): 2176 seq.set("minvalue", self._parse_term()) 2177 elif self._match_text_seq("MAXVALUE"): 2178 seq.set("maxvalue", self._parse_term()) 2179 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2180 self._match_text_seq("=") 2181 seq.set("start", self._parse_term()) 2182 elif self._match_text_seq("CACHE"): 2183 # T-SQL allows empty CACHE which is initialized dynamically 2184 seq.set("cache", self._parse_number() or True) 2185 elif self._match_text_seq("OWNED", "BY"): 2186 # "OWNED BY NONE" is the default 2187 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2188 else: 2189 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2190 if opt: 2191 options.append(opt) 2192 else: 2193 break 2194 2195 seq.set("options", options if options else None) 2196 return None if self._index == index else seq 2197 2198 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2199 # only used for teradata currently 2200 self._match(TokenType.COMMA) 2201 2202 kwargs = { 2203 "no": self._match_text_seq("NO"), 2204 "dual": self._match_text_seq("DUAL"), 2205 "before": self._match_text_seq("BEFORE"), 2206 "default": self._match_text_seq("DEFAULT"), 2207 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2208 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2209 "after": self._match_text_seq("AFTER"), 2210 "minimum": self._match_texts(("MIN", "MINIMUM")), 2211 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2212 } 2213 2214 if self._match_texts(self.PROPERTY_PARSERS): 2215 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2216 try: 2217 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2218 except TypeError: 2219 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2220 2221 return None 2222 2223 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2224 return self._parse_wrapped_csv(self._parse_property) 2225 2226 def _parse_property(self) -> t.Optional[exp.Expression]: 2227 if self._match_texts(self.PROPERTY_PARSERS): 2228 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2229 2230 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2231 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2232 2233 if self._match_text_seq("COMPOUND", "SORTKEY"): 2234 return self._parse_sortkey(compound=True) 2235 2236 if self._match_text_seq("SQL", "SECURITY"): 2237 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2238 2239 index = self._index 2240 2241 seq_props = self._parse_sequence_properties() 2242 if seq_props: 2243 return seq_props 2244 2245 self._retreat(index) 2246 key = self._parse_column() 2247 2248 if not self._match(TokenType.EQ): 2249 self._retreat(index) 2250 return None 2251 2252 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2253 if isinstance(key, exp.Column): 2254 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2255 2256 value = self._parse_bitwise() or self._parse_var(any_token=True) 2257 2258 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2259 if isinstance(value, exp.Column): 2260 value = exp.var(value.name) 2261 2262 return self.expression(exp.Property, this=key, value=value) 2263 2264 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2265 if self._match_text_seq("BY"): 2266 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2267 2268 self._match(TokenType.ALIAS) 2269 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2270 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2271 2272 return self.expression( 2273 exp.FileFormatProperty, 2274 this=( 2275 self.expression( 2276 exp.InputOutputFormat, 2277 input_format=input_format, 2278 output_format=output_format, 2279 ) 2280 if input_format or output_format 2281 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2282 ), 2283 hive_format=True, 2284 ) 2285 2286 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2287 field = self._parse_field() 2288 if isinstance(field, exp.Identifier) and not field.quoted: 2289 field = exp.var(field) 2290 2291 return field 2292 2293 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2294 self._match(TokenType.EQ) 2295 self._match(TokenType.ALIAS) 2296 2297 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2298 2299 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2300 properties = [] 2301 while True: 2302 if before: 2303 prop = self._parse_property_before() 2304 else: 2305 prop = self._parse_property() 2306 if not prop: 2307 break 2308 for p in ensure_list(prop): 2309 properties.append(p) 2310 2311 if properties: 2312 return self.expression(exp.Properties, expressions=properties) 2313 2314 return None 2315 2316 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2317 return self.expression( 2318 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2319 ) 2320 2321 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2322 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2323 security_specifier = self._prev.text.upper() 2324 return self.expression(exp.SecurityProperty, this=security_specifier) 2325 return None 2326 2327 def _parse_settings_property(self) -> exp.SettingsProperty: 2328 return self.expression( 2329 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2330 ) 2331 2332 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2333 if self._index >= 2: 2334 pre_volatile_token = self._tokens[self._index - 2] 2335 else: 2336 pre_volatile_token = None 2337 2338 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2339 return exp.VolatileProperty() 2340 2341 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2342 2343 def _parse_retention_period(self) -> exp.Var: 2344 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2345 number = self._parse_number() 2346 number_str = f"{number} " if number else "" 2347 unit = self._parse_var(any_token=True) 2348 return exp.var(f"{number_str}{unit}") 2349 2350 def _parse_system_versioning_property( 2351 self, with_: bool = False 2352 ) -> exp.WithSystemVersioningProperty: 2353 self._match(TokenType.EQ) 2354 prop = self.expression( 2355 exp.WithSystemVersioningProperty, 2356 **{ # type: ignore 2357 "on": True, 2358 "with": with_, 2359 }, 2360 ) 2361 2362 if self._match_text_seq("OFF"): 2363 prop.set("on", False) 2364 return prop 2365 2366 self._match(TokenType.ON) 2367 if self._match(TokenType.L_PAREN): 2368 while self._curr and not self._match(TokenType.R_PAREN): 2369 if self._match_text_seq("HISTORY_TABLE", "="): 2370 prop.set("this", self._parse_table_parts()) 2371 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2372 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2373 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2374 prop.set("retention_period", self._parse_retention_period()) 2375 2376 self._match(TokenType.COMMA) 2377 2378 return prop 2379 2380 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2381 self._match(TokenType.EQ) 2382 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2383 prop = self.expression(exp.DataDeletionProperty, on=on) 2384 2385 if self._match(TokenType.L_PAREN): 2386 while self._curr and not self._match(TokenType.R_PAREN): 2387 if self._match_text_seq("FILTER_COLUMN", "="): 2388 prop.set("filter_column", self._parse_column()) 2389 elif self._match_text_seq("RETENTION_PERIOD", "="): 2390 prop.set("retention_period", self._parse_retention_period()) 2391 2392 self._match(TokenType.COMMA) 2393 2394 return prop 2395 2396 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2397 kind = "HASH" 2398 expressions: t.Optional[t.List[exp.Expression]] = None 2399 if self._match_text_seq("BY", "HASH"): 2400 expressions = self._parse_wrapped_csv(self._parse_id_var) 2401 elif self._match_text_seq("BY", "RANDOM"): 2402 kind = "RANDOM" 2403 2404 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2405 buckets: t.Optional[exp.Expression] = None 2406 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2407 buckets = self._parse_number() 2408 2409 return self.expression( 2410 exp.DistributedByProperty, 2411 expressions=expressions, 2412 kind=kind, 2413 buckets=buckets, 2414 order=self._parse_order(), 2415 ) 2416 2417 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2418 self._match_text_seq("KEY") 2419 expressions = self._parse_wrapped_id_vars() 2420 return self.expression(expr_type, expressions=expressions) 2421 2422 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2423 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2424 prop = self._parse_system_versioning_property(with_=True) 2425 self._match_r_paren() 2426 return prop 2427 2428 if self._match(TokenType.L_PAREN, advance=False): 2429 return self._parse_wrapped_properties() 2430 2431 if self._match_text_seq("JOURNAL"): 2432 return self._parse_withjournaltable() 2433 2434 if self._match_texts(self.VIEW_ATTRIBUTES): 2435 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2436 2437 if self._match_text_seq("DATA"): 2438 return self._parse_withdata(no=False) 2439 elif self._match_text_seq("NO", "DATA"): 2440 return self._parse_withdata(no=True) 2441 2442 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2443 return self._parse_serde_properties(with_=True) 2444 2445 if self._match(TokenType.SCHEMA): 2446 return self.expression( 2447 exp.WithSchemaBindingProperty, 2448 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2449 ) 2450 2451 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2452 return self.expression( 2453 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2454 ) 2455 2456 if not self._next: 2457 return None 2458 2459 return self._parse_withisolatedloading() 2460 2461 def _parse_procedure_option(self) -> exp.Expression | None: 2462 if self._match_text_seq("EXECUTE", "AS"): 2463 return self.expression( 2464 exp.ExecuteAsProperty, 2465 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2466 or self._parse_string(), 2467 ) 2468 2469 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2470 2471 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2472 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2473 self._match(TokenType.EQ) 2474 2475 user = self._parse_id_var() 2476 self._match(TokenType.PARAMETER) 2477 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2478 2479 if not user or not host: 2480 return None 2481 2482 return exp.DefinerProperty(this=f"{user}@{host}") 2483 2484 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2485 self._match(TokenType.TABLE) 2486 self._match(TokenType.EQ) 2487 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2488 2489 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2490 return self.expression(exp.LogProperty, no=no) 2491 2492 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2493 return self.expression(exp.JournalProperty, **kwargs) 2494 2495 def _parse_checksum(self) -> exp.ChecksumProperty: 2496 self._match(TokenType.EQ) 2497 2498 on = None 2499 if self._match(TokenType.ON): 2500 on = True 2501 elif self._match_text_seq("OFF"): 2502 on = False 2503 2504 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2505 2506 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2507 return self.expression( 2508 exp.Cluster, 2509 expressions=( 2510 self._parse_wrapped_csv(self._parse_ordered) 2511 if wrapped 2512 else self._parse_csv(self._parse_ordered) 2513 ), 2514 ) 2515 2516 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2517 self._match_text_seq("BY") 2518 2519 self._match_l_paren() 2520 expressions = self._parse_csv(self._parse_column) 2521 self._match_r_paren() 2522 2523 if self._match_text_seq("SORTED", "BY"): 2524 self._match_l_paren() 2525 sorted_by = self._parse_csv(self._parse_ordered) 2526 self._match_r_paren() 2527 else: 2528 sorted_by = None 2529 2530 self._match(TokenType.INTO) 2531 buckets = self._parse_number() 2532 self._match_text_seq("BUCKETS") 2533 2534 return self.expression( 2535 exp.ClusteredByProperty, 2536 expressions=expressions, 2537 sorted_by=sorted_by, 2538 buckets=buckets, 2539 ) 2540 2541 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2542 if not self._match_text_seq("GRANTS"): 2543 self._retreat(self._index - 1) 2544 return None 2545 2546 return self.expression(exp.CopyGrantsProperty) 2547 2548 def _parse_freespace(self) -> exp.FreespaceProperty: 2549 self._match(TokenType.EQ) 2550 return self.expression( 2551 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2552 ) 2553 2554 def _parse_mergeblockratio( 2555 self, no: bool = False, default: bool = False 2556 ) -> exp.MergeBlockRatioProperty: 2557 if self._match(TokenType.EQ): 2558 return self.expression( 2559 exp.MergeBlockRatioProperty, 2560 this=self._parse_number(), 2561 percent=self._match(TokenType.PERCENT), 2562 ) 2563 2564 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2565 2566 def _parse_datablocksize( 2567 self, 2568 default: t.Optional[bool] = None, 2569 minimum: t.Optional[bool] = None, 2570 maximum: t.Optional[bool] = None, 2571 ) -> exp.DataBlocksizeProperty: 2572 self._match(TokenType.EQ) 2573 size = self._parse_number() 2574 2575 units = None 2576 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2577 units = self._prev.text 2578 2579 return self.expression( 2580 exp.DataBlocksizeProperty, 2581 size=size, 2582 units=units, 2583 default=default, 2584 minimum=minimum, 2585 maximum=maximum, 2586 ) 2587 2588 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2589 self._match(TokenType.EQ) 2590 always = self._match_text_seq("ALWAYS") 2591 manual = self._match_text_seq("MANUAL") 2592 never = self._match_text_seq("NEVER") 2593 default = self._match_text_seq("DEFAULT") 2594 2595 autotemp = None 2596 if self._match_text_seq("AUTOTEMP"): 2597 autotemp = self._parse_schema() 2598 2599 return self.expression( 2600 exp.BlockCompressionProperty, 2601 always=always, 2602 manual=manual, 2603 never=never, 2604 default=default, 2605 autotemp=autotemp, 2606 ) 2607 2608 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2609 index = self._index 2610 no = self._match_text_seq("NO") 2611 concurrent = self._match_text_seq("CONCURRENT") 2612 2613 if not self._match_text_seq("ISOLATED", "LOADING"): 2614 self._retreat(index) 2615 return None 2616 2617 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2618 return self.expression( 2619 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2620 ) 2621 2622 def _parse_locking(self) -> exp.LockingProperty: 2623 if self._match(TokenType.TABLE): 2624 kind = "TABLE" 2625 elif self._match(TokenType.VIEW): 2626 kind = "VIEW" 2627 elif self._match(TokenType.ROW): 2628 kind = "ROW" 2629 elif self._match_text_seq("DATABASE"): 2630 kind = "DATABASE" 2631 else: 2632 kind = None 2633 2634 if kind in ("DATABASE", "TABLE", "VIEW"): 2635 this = self._parse_table_parts() 2636 else: 2637 this = None 2638 2639 if self._match(TokenType.FOR): 2640 for_or_in = "FOR" 2641 elif self._match(TokenType.IN): 2642 for_or_in = "IN" 2643 else: 2644 for_or_in = None 2645 2646 if self._match_text_seq("ACCESS"): 2647 lock_type = "ACCESS" 2648 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2649 lock_type = "EXCLUSIVE" 2650 elif self._match_text_seq("SHARE"): 2651 lock_type = "SHARE" 2652 elif self._match_text_seq("READ"): 2653 lock_type = "READ" 2654 elif self._match_text_seq("WRITE"): 2655 lock_type = "WRITE" 2656 elif self._match_text_seq("CHECKSUM"): 2657 lock_type = "CHECKSUM" 2658 else: 2659 lock_type = None 2660 2661 override = self._match_text_seq("OVERRIDE") 2662 2663 return self.expression( 2664 exp.LockingProperty, 2665 this=this, 2666 kind=kind, 2667 for_or_in=for_or_in, 2668 lock_type=lock_type, 2669 override=override, 2670 ) 2671 2672 def _parse_partition_by(self) -> t.List[exp.Expression]: 2673 if self._match(TokenType.PARTITION_BY): 2674 return self._parse_csv(self._parse_assignment) 2675 return [] 2676 2677 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2678 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2679 if self._match_text_seq("MINVALUE"): 2680 return exp.var("MINVALUE") 2681 if self._match_text_seq("MAXVALUE"): 2682 return exp.var("MAXVALUE") 2683 return self._parse_bitwise() 2684 2685 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2686 expression = None 2687 from_expressions = None 2688 to_expressions = None 2689 2690 if self._match(TokenType.IN): 2691 this = self._parse_wrapped_csv(self._parse_bitwise) 2692 elif self._match(TokenType.FROM): 2693 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2694 self._match_text_seq("TO") 2695 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2696 elif self._match_text_seq("WITH", "(", "MODULUS"): 2697 this = self._parse_number() 2698 self._match_text_seq(",", "REMAINDER") 2699 expression = self._parse_number() 2700 self._match_r_paren() 2701 else: 2702 self.raise_error("Failed to parse partition bound spec.") 2703 2704 return self.expression( 2705 exp.PartitionBoundSpec, 2706 this=this, 2707 expression=expression, 2708 from_expressions=from_expressions, 2709 to_expressions=to_expressions, 2710 ) 2711 2712 # https://www.postgresql.org/docs/current/sql-createtable.html 2713 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2714 if not self._match_text_seq("OF"): 2715 self._retreat(self._index - 1) 2716 return None 2717 2718 this = self._parse_table(schema=True) 2719 2720 if self._match(TokenType.DEFAULT): 2721 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2722 elif self._match_text_seq("FOR", "VALUES"): 2723 expression = self._parse_partition_bound_spec() 2724 else: 2725 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2726 2727 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2728 2729 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2730 self._match(TokenType.EQ) 2731 return self.expression( 2732 exp.PartitionedByProperty, 2733 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2734 ) 2735 2736 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2737 if self._match_text_seq("AND", "STATISTICS"): 2738 statistics = True 2739 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2740 statistics = False 2741 else: 2742 statistics = None 2743 2744 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2745 2746 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2747 if self._match_text_seq("SQL"): 2748 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2749 return None 2750 2751 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2752 if self._match_text_seq("SQL", "DATA"): 2753 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2754 return None 2755 2756 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2757 if self._match_text_seq("PRIMARY", "INDEX"): 2758 return exp.NoPrimaryIndexProperty() 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2761 return None 2762 2763 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2764 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2765 return exp.OnCommitProperty() 2766 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2767 return exp.OnCommitProperty(delete=True) 2768 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2769 2770 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2771 if self._match_text_seq("SQL", "DATA"): 2772 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2773 return None 2774 2775 def _parse_distkey(self) -> exp.DistKeyProperty: 2776 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2777 2778 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2779 table = self._parse_table(schema=True) 2780 2781 options = [] 2782 while self._match_texts(("INCLUDING", "EXCLUDING")): 2783 this = self._prev.text.upper() 2784 2785 id_var = self._parse_id_var() 2786 if not id_var: 2787 return None 2788 2789 options.append( 2790 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2791 ) 2792 2793 return self.expression(exp.LikeProperty, this=table, expressions=options) 2794 2795 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2796 return self.expression( 2797 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2798 ) 2799 2800 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2801 self._match(TokenType.EQ) 2802 return self.expression( 2803 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2804 ) 2805 2806 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2807 self._match_text_seq("WITH", "CONNECTION") 2808 return self.expression( 2809 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2810 ) 2811 2812 def _parse_returns(self) -> exp.ReturnsProperty: 2813 value: t.Optional[exp.Expression] 2814 null = None 2815 is_table = self._match(TokenType.TABLE) 2816 2817 if is_table: 2818 if self._match(TokenType.LT): 2819 value = self.expression( 2820 exp.Schema, 2821 this="TABLE", 2822 expressions=self._parse_csv(self._parse_struct_types), 2823 ) 2824 if not self._match(TokenType.GT): 2825 self.raise_error("Expecting >") 2826 else: 2827 value = self._parse_schema(exp.var("TABLE")) 2828 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2829 null = True 2830 value = None 2831 else: 2832 value = self._parse_types() 2833 2834 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2835 2836 def _parse_describe(self) -> exp.Describe: 2837 kind = self._match_set(self.CREATABLES) and self._prev.text 2838 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2839 if self._match(TokenType.DOT): 2840 style = None 2841 self._retreat(self._index - 2) 2842 2843 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2844 2845 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2846 this = self._parse_statement() 2847 else: 2848 this = self._parse_table(schema=True) 2849 2850 properties = self._parse_properties() 2851 expressions = properties.expressions if properties else None 2852 partition = self._parse_partition() 2853 return self.expression( 2854 exp.Describe, 2855 this=this, 2856 style=style, 2857 kind=kind, 2858 expressions=expressions, 2859 partition=partition, 2860 format=format, 2861 ) 2862 2863 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2864 kind = self._prev.text.upper() 2865 expressions = [] 2866 2867 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2868 if self._match(TokenType.WHEN): 2869 expression = self._parse_disjunction() 2870 self._match(TokenType.THEN) 2871 else: 2872 expression = None 2873 2874 else_ = self._match(TokenType.ELSE) 2875 2876 if not self._match(TokenType.INTO): 2877 return None 2878 2879 return self.expression( 2880 exp.ConditionalInsert, 2881 this=self.expression( 2882 exp.Insert, 2883 this=self._parse_table(schema=True), 2884 expression=self._parse_derived_table_values(), 2885 ), 2886 expression=expression, 2887 else_=else_, 2888 ) 2889 2890 expression = parse_conditional_insert() 2891 while expression is not None: 2892 expressions.append(expression) 2893 expression = parse_conditional_insert() 2894 2895 return self.expression( 2896 exp.MultitableInserts, 2897 kind=kind, 2898 comments=comments, 2899 expressions=expressions, 2900 source=self._parse_table(), 2901 ) 2902 2903 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2904 comments = [] 2905 hint = self._parse_hint() 2906 overwrite = self._match(TokenType.OVERWRITE) 2907 ignore = self._match(TokenType.IGNORE) 2908 local = self._match_text_seq("LOCAL") 2909 alternative = None 2910 is_function = None 2911 2912 if self._match_text_seq("DIRECTORY"): 2913 this: t.Optional[exp.Expression] = self.expression( 2914 exp.Directory, 2915 this=self._parse_var_or_string(), 2916 local=local, 2917 row_format=self._parse_row_format(match_row=True), 2918 ) 2919 else: 2920 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2921 comments += ensure_list(self._prev_comments) 2922 return self._parse_multitable_inserts(comments) 2923 2924 if self._match(TokenType.OR): 2925 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2926 2927 self._match(TokenType.INTO) 2928 comments += ensure_list(self._prev_comments) 2929 self._match(TokenType.TABLE) 2930 is_function = self._match(TokenType.FUNCTION) 2931 2932 this = ( 2933 self._parse_table(schema=True, parse_partition=True) 2934 if not is_function 2935 else self._parse_function() 2936 ) 2937 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2938 this.set("alias", self._parse_table_alias()) 2939 2940 returning = self._parse_returning() 2941 2942 return self.expression( 2943 exp.Insert, 2944 comments=comments, 2945 hint=hint, 2946 is_function=is_function, 2947 this=this, 2948 stored=self._match_text_seq("STORED") and self._parse_stored(), 2949 by_name=self._match_text_seq("BY", "NAME"), 2950 exists=self._parse_exists(), 2951 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2952 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2953 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2954 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2955 conflict=self._parse_on_conflict(), 2956 returning=returning or self._parse_returning(), 2957 overwrite=overwrite, 2958 alternative=alternative, 2959 ignore=ignore, 2960 source=self._match(TokenType.TABLE) and self._parse_table(), 2961 ) 2962 2963 def _parse_kill(self) -> exp.Kill: 2964 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2965 2966 return self.expression( 2967 exp.Kill, 2968 this=self._parse_primary(), 2969 kind=kind, 2970 ) 2971 2972 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2973 conflict = self._match_text_seq("ON", "CONFLICT") 2974 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2975 2976 if not conflict and not duplicate: 2977 return None 2978 2979 conflict_keys = None 2980 constraint = None 2981 2982 if conflict: 2983 if self._match_text_seq("ON", "CONSTRAINT"): 2984 constraint = self._parse_id_var() 2985 elif self._match(TokenType.L_PAREN): 2986 conflict_keys = self._parse_csv(self._parse_id_var) 2987 self._match_r_paren() 2988 2989 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2990 if self._prev.token_type == TokenType.UPDATE: 2991 self._match(TokenType.SET) 2992 expressions = self._parse_csv(self._parse_equality) 2993 else: 2994 expressions = None 2995 2996 return self.expression( 2997 exp.OnConflict, 2998 duplicate=duplicate, 2999 expressions=expressions, 3000 action=action, 3001 conflict_keys=conflict_keys, 3002 constraint=constraint, 3003 where=self._parse_where(), 3004 ) 3005 3006 def _parse_returning(self) -> t.Optional[exp.Returning]: 3007 if not self._match(TokenType.RETURNING): 3008 return None 3009 return self.expression( 3010 exp.Returning, 3011 expressions=self._parse_csv(self._parse_expression), 3012 into=self._match(TokenType.INTO) and self._parse_table_part(), 3013 ) 3014 3015 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3016 if not self._match(TokenType.FORMAT): 3017 return None 3018 return self._parse_row_format() 3019 3020 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3021 index = self._index 3022 with_ = with_ or self._match_text_seq("WITH") 3023 3024 if not self._match(TokenType.SERDE_PROPERTIES): 3025 self._retreat(index) 3026 return None 3027 return self.expression( 3028 exp.SerdeProperties, 3029 **{ # type: ignore 3030 "expressions": self._parse_wrapped_properties(), 3031 "with": with_, 3032 }, 3033 ) 3034 3035 def _parse_row_format( 3036 self, match_row: bool = False 3037 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3038 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3039 return None 3040 3041 if self._match_text_seq("SERDE"): 3042 this = self._parse_string() 3043 3044 serde_properties = self._parse_serde_properties() 3045 3046 return self.expression( 3047 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3048 ) 3049 3050 self._match_text_seq("DELIMITED") 3051 3052 kwargs = {} 3053 3054 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3055 kwargs["fields"] = self._parse_string() 3056 if self._match_text_seq("ESCAPED", "BY"): 3057 kwargs["escaped"] = self._parse_string() 3058 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3059 kwargs["collection_items"] = self._parse_string() 3060 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3061 kwargs["map_keys"] = self._parse_string() 3062 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3063 kwargs["lines"] = self._parse_string() 3064 if self._match_text_seq("NULL", "DEFINED", "AS"): 3065 kwargs["null"] = self._parse_string() 3066 3067 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3068 3069 def _parse_load(self) -> exp.LoadData | exp.Command: 3070 if self._match_text_seq("DATA"): 3071 local = self._match_text_seq("LOCAL") 3072 self._match_text_seq("INPATH") 3073 inpath = self._parse_string() 3074 overwrite = self._match(TokenType.OVERWRITE) 3075 self._match_pair(TokenType.INTO, TokenType.TABLE) 3076 3077 return self.expression( 3078 exp.LoadData, 3079 this=self._parse_table(schema=True), 3080 local=local, 3081 overwrite=overwrite, 3082 inpath=inpath, 3083 partition=self._parse_partition(), 3084 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3085 serde=self._match_text_seq("SERDE") and self._parse_string(), 3086 ) 3087 return self._parse_as_command(self._prev) 3088 3089 def _parse_delete(self) -> exp.Delete: 3090 # This handles MySQL's "Multiple-Table Syntax" 3091 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3092 tables = None 3093 if not self._match(TokenType.FROM, advance=False): 3094 tables = self._parse_csv(self._parse_table) or None 3095 3096 returning = self._parse_returning() 3097 3098 return self.expression( 3099 exp.Delete, 3100 tables=tables, 3101 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3102 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3103 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3104 where=self._parse_where(), 3105 returning=returning or self._parse_returning(), 3106 limit=self._parse_limit(), 3107 ) 3108 3109 def _parse_update(self) -> exp.Update: 3110 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3111 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3112 returning = self._parse_returning() 3113 return self.expression( 3114 exp.Update, 3115 **{ # type: ignore 3116 "this": this, 3117 "expressions": expressions, 3118 "from": self._parse_from(joins=True), 3119 "where": self._parse_where(), 3120 "returning": returning or self._parse_returning(), 3121 "order": self._parse_order(), 3122 "limit": self._parse_limit(), 3123 }, 3124 ) 3125 3126 def _parse_use(self) -> exp.Use: 3127 return self.expression( 3128 exp.Use, 3129 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3130 this=self._parse_table(schema=False), 3131 ) 3132 3133 def _parse_uncache(self) -> exp.Uncache: 3134 if not self._match(TokenType.TABLE): 3135 self.raise_error("Expecting TABLE after UNCACHE") 3136 3137 return self.expression( 3138 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3139 ) 3140 3141 def _parse_cache(self) -> exp.Cache: 3142 lazy = self._match_text_seq("LAZY") 3143 self._match(TokenType.TABLE) 3144 table = self._parse_table(schema=True) 3145 3146 options = [] 3147 if self._match_text_seq("OPTIONS"): 3148 self._match_l_paren() 3149 k = self._parse_string() 3150 self._match(TokenType.EQ) 3151 v = self._parse_string() 3152 options = [k, v] 3153 self._match_r_paren() 3154 3155 self._match(TokenType.ALIAS) 3156 return self.expression( 3157 exp.Cache, 3158 this=table, 3159 lazy=lazy, 3160 options=options, 3161 expression=self._parse_select(nested=True), 3162 ) 3163 3164 def _parse_partition(self) -> t.Optional[exp.Partition]: 3165 if not self._match_texts(self.PARTITION_KEYWORDS): 3166 return None 3167 3168 return self.expression( 3169 exp.Partition, 3170 subpartition=self._prev.text.upper() == "SUBPARTITION", 3171 expressions=self._parse_wrapped_csv(self._parse_assignment), 3172 ) 3173 3174 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3175 def _parse_value_expression() -> t.Optional[exp.Expression]: 3176 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3177 return exp.var(self._prev.text.upper()) 3178 return self._parse_expression() 3179 3180 if self._match(TokenType.L_PAREN): 3181 expressions = self._parse_csv(_parse_value_expression) 3182 self._match_r_paren() 3183 return self.expression(exp.Tuple, expressions=expressions) 3184 3185 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3186 expression = self._parse_expression() 3187 if expression: 3188 return self.expression(exp.Tuple, expressions=[expression]) 3189 return None 3190 3191 def _parse_projections(self) -> t.List[exp.Expression]: 3192 return self._parse_expressions() 3193 3194 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3195 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3196 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3197 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3198 ) 3199 elif self._match(TokenType.FROM): 3200 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3201 # Support parentheses for duckdb FROM-first syntax 3202 select = self._parse_select() 3203 if select: 3204 select.set("from", from_) 3205 this = select 3206 else: 3207 this = exp.select("*").from_(t.cast(exp.From, from_)) 3208 else: 3209 this = ( 3210 self._parse_table(consume_pipe=True) 3211 if table 3212 else self._parse_select(nested=True, parse_set_operation=False) 3213 ) 3214 3215 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3216 # in case a modifier (e.g. join) is following 3217 if table and isinstance(this, exp.Values) and this.alias: 3218 alias = this.args["alias"].pop() 3219 this = exp.Table(this=this, alias=alias) 3220 3221 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3222 3223 return this 3224 3225 def _parse_select( 3226 self, 3227 nested: bool = False, 3228 table: bool = False, 3229 parse_subquery_alias: bool = True, 3230 parse_set_operation: bool = True, 3231 consume_pipe: bool = True, 3232 ) -> t.Optional[exp.Expression]: 3233 query = self._parse_select_query( 3234 nested=nested, 3235 table=table, 3236 parse_subquery_alias=parse_subquery_alias, 3237 parse_set_operation=parse_set_operation, 3238 ) 3239 3240 if ( 3241 consume_pipe 3242 and self._match(TokenType.PIPE_GT, advance=False) 3243 and isinstance(query, exp.Query) 3244 ): 3245 query = self._parse_pipe_syntax_query(query) 3246 query = query.subquery(copy=False) if query and table else query 3247 3248 return query 3249 3250 def _parse_select_query( 3251 self, 3252 nested: bool = False, 3253 table: bool = False, 3254 parse_subquery_alias: bool = True, 3255 parse_set_operation: bool = True, 3256 ) -> t.Optional[exp.Expression]: 3257 cte = self._parse_with() 3258 3259 if cte: 3260 this = self._parse_statement() 3261 3262 if not this: 3263 self.raise_error("Failed to parse any statement following CTE") 3264 return cte 3265 3266 if "with" in this.arg_types: 3267 this.set("with", cte) 3268 else: 3269 self.raise_error(f"{this.key} does not support CTE") 3270 this = cte 3271 3272 return this 3273 3274 # duckdb supports leading with FROM x 3275 from_ = ( 3276 self._parse_from(consume_pipe=True) 3277 if self._match(TokenType.FROM, advance=False) 3278 else None 3279 ) 3280 3281 if self._match(TokenType.SELECT): 3282 comments = self._prev_comments 3283 3284 hint = self._parse_hint() 3285 3286 if self._next and not self._next.token_type == TokenType.DOT: 3287 all_ = self._match(TokenType.ALL) 3288 distinct = self._match_set(self.DISTINCT_TOKENS) 3289 else: 3290 all_, distinct = None, None 3291 3292 kind = ( 3293 self._match(TokenType.ALIAS) 3294 and self._match_texts(("STRUCT", "VALUE")) 3295 and self._prev.text.upper() 3296 ) 3297 3298 if distinct: 3299 distinct = self.expression( 3300 exp.Distinct, 3301 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3302 ) 3303 3304 if all_ and distinct: 3305 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3306 3307 operation_modifiers = [] 3308 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3309 operation_modifiers.append(exp.var(self._prev.text.upper())) 3310 3311 limit = self._parse_limit(top=True) 3312 projections = self._parse_projections() 3313 3314 this = self.expression( 3315 exp.Select, 3316 kind=kind, 3317 hint=hint, 3318 distinct=distinct, 3319 expressions=projections, 3320 limit=limit, 3321 operation_modifiers=operation_modifiers or None, 3322 ) 3323 this.comments = comments 3324 3325 into = self._parse_into() 3326 if into: 3327 this.set("into", into) 3328 3329 if not from_: 3330 from_ = self._parse_from() 3331 3332 if from_: 3333 this.set("from", from_) 3334 3335 this = self._parse_query_modifiers(this) 3336 elif (table or nested) and self._match(TokenType.L_PAREN): 3337 this = self._parse_wrapped_select(table=table) 3338 3339 # We return early here so that the UNION isn't attached to the subquery by the 3340 # following call to _parse_set_operations, but instead becomes the parent node 3341 self._match_r_paren() 3342 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3343 elif self._match(TokenType.VALUES, advance=False): 3344 this = self._parse_derived_table_values() 3345 elif from_: 3346 this = exp.select("*").from_(from_.this, copy=False) 3347 elif self._match(TokenType.SUMMARIZE): 3348 table = self._match(TokenType.TABLE) 3349 this = self._parse_select() or self._parse_string() or self._parse_table() 3350 return self.expression(exp.Summarize, this=this, table=table) 3351 elif self._match(TokenType.DESCRIBE): 3352 this = self._parse_describe() 3353 elif self._match_text_seq("STREAM"): 3354 this = self._parse_function() 3355 if this: 3356 this = self.expression(exp.Stream, this=this) 3357 else: 3358 self._retreat(self._index - 1) 3359 else: 3360 this = None 3361 3362 return self._parse_set_operations(this) if parse_set_operation else this 3363 3364 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3365 self._match_text_seq("SEARCH") 3366 3367 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3368 3369 if not kind: 3370 return None 3371 3372 self._match_text_seq("FIRST", "BY") 3373 3374 return self.expression( 3375 exp.RecursiveWithSearch, 3376 kind=kind, 3377 this=self._parse_id_var(), 3378 expression=self._match_text_seq("SET") and self._parse_id_var(), 3379 using=self._match_text_seq("USING") and self._parse_id_var(), 3380 ) 3381 3382 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3383 if not skip_with_token and not self._match(TokenType.WITH): 3384 return None 3385 3386 comments = self._prev_comments 3387 recursive = self._match(TokenType.RECURSIVE) 3388 3389 last_comments = None 3390 expressions = [] 3391 while True: 3392 cte = self._parse_cte() 3393 if isinstance(cte, exp.CTE): 3394 expressions.append(cte) 3395 if last_comments: 3396 cte.add_comments(last_comments) 3397 3398 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3399 break 3400 else: 3401 self._match(TokenType.WITH) 3402 3403 last_comments = self._prev_comments 3404 3405 return self.expression( 3406 exp.With, 3407 comments=comments, 3408 expressions=expressions, 3409 recursive=recursive, 3410 search=self._parse_recursive_with_search(), 3411 ) 3412 3413 def _parse_cte(self) -> t.Optional[exp.CTE]: 3414 index = self._index 3415 3416 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3417 if not alias or not alias.this: 3418 self.raise_error("Expected CTE to have alias") 3419 3420 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3421 self._retreat(index) 3422 return None 3423 3424 comments = self._prev_comments 3425 3426 if self._match_text_seq("NOT", "MATERIALIZED"): 3427 materialized = False 3428 elif self._match_text_seq("MATERIALIZED"): 3429 materialized = True 3430 else: 3431 materialized = None 3432 3433 cte = self.expression( 3434 exp.CTE, 3435 this=self._parse_wrapped(self._parse_statement), 3436 alias=alias, 3437 materialized=materialized, 3438 comments=comments, 3439 ) 3440 3441 values = cte.this 3442 if isinstance(values, exp.Values): 3443 if values.alias: 3444 cte.set("this", exp.select("*").from_(values)) 3445 else: 3446 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3447 3448 return cte 3449 3450 def _parse_table_alias( 3451 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3452 ) -> t.Optional[exp.TableAlias]: 3453 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3454 # so this section tries to parse the clause version and if it fails, it treats the token 3455 # as an identifier (alias) 3456 if self._can_parse_limit_or_offset(): 3457 return None 3458 3459 any_token = self._match(TokenType.ALIAS) 3460 alias = ( 3461 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3462 or self._parse_string_as_identifier() 3463 ) 3464 3465 index = self._index 3466 if self._match(TokenType.L_PAREN): 3467 columns = self._parse_csv(self._parse_function_parameter) 3468 self._match_r_paren() if columns else self._retreat(index) 3469 else: 3470 columns = None 3471 3472 if not alias and not columns: 3473 return None 3474 3475 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3476 3477 # We bubble up comments from the Identifier to the TableAlias 3478 if isinstance(alias, exp.Identifier): 3479 table_alias.add_comments(alias.pop_comments()) 3480 3481 return table_alias 3482 3483 def _parse_subquery( 3484 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3485 ) -> t.Optional[exp.Subquery]: 3486 if not this: 3487 return None 3488 3489 return self.expression( 3490 exp.Subquery, 3491 this=this, 3492 pivots=self._parse_pivots(), 3493 alias=self._parse_table_alias() if parse_alias else None, 3494 sample=self._parse_table_sample(), 3495 ) 3496 3497 def _implicit_unnests_to_explicit(self, this: E) -> E: 3498 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3499 3500 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3501 for i, join in enumerate(this.args.get("joins") or []): 3502 table = join.this 3503 normalized_table = table.copy() 3504 normalized_table.meta["maybe_column"] = True 3505 normalized_table = _norm(normalized_table, dialect=self.dialect) 3506 3507 if isinstance(table, exp.Table) and not join.args.get("on"): 3508 if normalized_table.parts[0].name in refs: 3509 table_as_column = table.to_column() 3510 unnest = exp.Unnest(expressions=[table_as_column]) 3511 3512 # Table.to_column creates a parent Alias node that we want to convert to 3513 # a TableAlias and attach to the Unnest, so it matches the parser's output 3514 if isinstance(table.args.get("alias"), exp.TableAlias): 3515 table_as_column.replace(table_as_column.this) 3516 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3517 3518 table.replace(unnest) 3519 3520 refs.add(normalized_table.alias_or_name) 3521 3522 return this 3523 3524 def _parse_query_modifiers( 3525 self, this: t.Optional[exp.Expression] 3526 ) -> t.Optional[exp.Expression]: 3527 if isinstance(this, self.MODIFIABLES): 3528 for join in self._parse_joins(): 3529 this.append("joins", join) 3530 for lateral in iter(self._parse_lateral, None): 3531 this.append("laterals", lateral) 3532 3533 while True: 3534 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3535 modifier_token = self._curr 3536 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3537 key, expression = parser(self) 3538 3539 if expression: 3540 if this.args.get(key): 3541 self.raise_error( 3542 f"Found multiple '{modifier_token.text.upper()}' clauses", 3543 token=modifier_token, 3544 ) 3545 3546 this.set(key, expression) 3547 if key == "limit": 3548 offset = expression.args.pop("offset", None) 3549 3550 if offset: 3551 offset = exp.Offset(expression=offset) 3552 this.set("offset", offset) 3553 3554 limit_by_expressions = expression.expressions 3555 expression.set("expressions", None) 3556 offset.set("expressions", limit_by_expressions) 3557 continue 3558 break 3559 3560 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3561 this = self._implicit_unnests_to_explicit(this) 3562 3563 return this 3564 3565 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3566 start = self._curr 3567 while self._curr: 3568 self._advance() 3569 3570 end = self._tokens[self._index - 1] 3571 return exp.Hint(expressions=[self._find_sql(start, end)]) 3572 3573 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3574 return self._parse_function_call() 3575 3576 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3577 start_index = self._index 3578 should_fallback_to_string = False 3579 3580 hints = [] 3581 try: 3582 for hint in iter( 3583 lambda: self._parse_csv( 3584 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3585 ), 3586 [], 3587 ): 3588 hints.extend(hint) 3589 except ParseError: 3590 should_fallback_to_string = True 3591 3592 if should_fallback_to_string or self._curr: 3593 self._retreat(start_index) 3594 return self._parse_hint_fallback_to_string() 3595 3596 return self.expression(exp.Hint, expressions=hints) 3597 3598 def _parse_hint(self) -> t.Optional[exp.Hint]: 3599 if self._match(TokenType.HINT) and self._prev_comments: 3600 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3601 3602 return None 3603 3604 def _parse_into(self) -> t.Optional[exp.Into]: 3605 if not self._match(TokenType.INTO): 3606 return None 3607 3608 temp = self._match(TokenType.TEMPORARY) 3609 unlogged = self._match_text_seq("UNLOGGED") 3610 self._match(TokenType.TABLE) 3611 3612 return self.expression( 3613 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3614 ) 3615 3616 def _parse_from( 3617 self, 3618 joins: bool = False, 3619 skip_from_token: bool = False, 3620 consume_pipe: bool = False, 3621 ) -> t.Optional[exp.From]: 3622 if not skip_from_token and not self._match(TokenType.FROM): 3623 return None 3624 3625 return self.expression( 3626 exp.From, 3627 comments=self._prev_comments, 3628 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3629 ) 3630 3631 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3632 return self.expression( 3633 exp.MatchRecognizeMeasure, 3634 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3635 this=self._parse_expression(), 3636 ) 3637 3638 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3639 if not self._match(TokenType.MATCH_RECOGNIZE): 3640 return None 3641 3642 self._match_l_paren() 3643 3644 partition = self._parse_partition_by() 3645 order = self._parse_order() 3646 3647 measures = ( 3648 self._parse_csv(self._parse_match_recognize_measure) 3649 if self._match_text_seq("MEASURES") 3650 else None 3651 ) 3652 3653 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3654 rows = exp.var("ONE ROW PER MATCH") 3655 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3656 text = "ALL ROWS PER MATCH" 3657 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3658 text += " SHOW EMPTY MATCHES" 3659 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3660 text += " OMIT EMPTY MATCHES" 3661 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3662 text += " WITH UNMATCHED ROWS" 3663 rows = exp.var(text) 3664 else: 3665 rows = None 3666 3667 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3668 text = "AFTER MATCH SKIP" 3669 if self._match_text_seq("PAST", "LAST", "ROW"): 3670 text += " PAST LAST ROW" 3671 elif self._match_text_seq("TO", "NEXT", "ROW"): 3672 text += " TO NEXT ROW" 3673 elif self._match_text_seq("TO", "FIRST"): 3674 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3675 elif self._match_text_seq("TO", "LAST"): 3676 text += f" TO LAST {self._advance_any().text}" # type: ignore 3677 after = exp.var(text) 3678 else: 3679 after = None 3680 3681 if self._match_text_seq("PATTERN"): 3682 self._match_l_paren() 3683 3684 if not self._curr: 3685 self.raise_error("Expecting )", self._curr) 3686 3687 paren = 1 3688 start = self._curr 3689 3690 while self._curr and paren > 0: 3691 if self._curr.token_type == TokenType.L_PAREN: 3692 paren += 1 3693 if self._curr.token_type == TokenType.R_PAREN: 3694 paren -= 1 3695 3696 end = self._prev 3697 self._advance() 3698 3699 if paren > 0: 3700 self.raise_error("Expecting )", self._curr) 3701 3702 pattern = exp.var(self._find_sql(start, end)) 3703 else: 3704 pattern = None 3705 3706 define = ( 3707 self._parse_csv(self._parse_name_as_expression) 3708 if self._match_text_seq("DEFINE") 3709 else None 3710 ) 3711 3712 self._match_r_paren() 3713 3714 return self.expression( 3715 exp.MatchRecognize, 3716 partition_by=partition, 3717 order=order, 3718 measures=measures, 3719 rows=rows, 3720 after=after, 3721 pattern=pattern, 3722 define=define, 3723 alias=self._parse_table_alias(), 3724 ) 3725 3726 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3727 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3728 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3729 cross_apply = False 3730 3731 if cross_apply is not None: 3732 this = self._parse_select(table=True) 3733 view = None 3734 outer = None 3735 elif self._match(TokenType.LATERAL): 3736 this = self._parse_select(table=True) 3737 view = self._match(TokenType.VIEW) 3738 outer = self._match(TokenType.OUTER) 3739 else: 3740 return None 3741 3742 if not this: 3743 this = ( 3744 self._parse_unnest() 3745 or self._parse_function() 3746 or self._parse_id_var(any_token=False) 3747 ) 3748 3749 while self._match(TokenType.DOT): 3750 this = exp.Dot( 3751 this=this, 3752 expression=self._parse_function() or self._parse_id_var(any_token=False), 3753 ) 3754 3755 ordinality: t.Optional[bool] = None 3756 3757 if view: 3758 table = self._parse_id_var(any_token=False) 3759 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3760 table_alias: t.Optional[exp.TableAlias] = self.expression( 3761 exp.TableAlias, this=table, columns=columns 3762 ) 3763 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3764 # We move the alias from the lateral's child node to the lateral itself 3765 table_alias = this.args["alias"].pop() 3766 else: 3767 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3768 table_alias = self._parse_table_alias() 3769 3770 return self.expression( 3771 exp.Lateral, 3772 this=this, 3773 view=view, 3774 outer=outer, 3775 alias=table_alias, 3776 cross_apply=cross_apply, 3777 ordinality=ordinality, 3778 ) 3779 3780 def _parse_join_parts( 3781 self, 3782 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3783 return ( 3784 self._match_set(self.JOIN_METHODS) and self._prev, 3785 self._match_set(self.JOIN_SIDES) and self._prev, 3786 self._match_set(self.JOIN_KINDS) and self._prev, 3787 ) 3788 3789 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3790 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3791 this = self._parse_column() 3792 if isinstance(this, exp.Column): 3793 return this.this 3794 return this 3795 3796 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3797 3798 def _parse_join( 3799 self, skip_join_token: bool = False, parse_bracket: bool = False 3800 ) -> t.Optional[exp.Join]: 3801 if self._match(TokenType.COMMA): 3802 table = self._try_parse(self._parse_table) 3803 cross_join = self.expression(exp.Join, this=table) if table else None 3804 3805 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3806 cross_join.set("kind", "CROSS") 3807 3808 return cross_join 3809 3810 index = self._index 3811 method, side, kind = self._parse_join_parts() 3812 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3813 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3814 join_comments = self._prev_comments 3815 3816 if not skip_join_token and not join: 3817 self._retreat(index) 3818 kind = None 3819 method = None 3820 side = None 3821 3822 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3823 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3824 3825 if not skip_join_token and not join and not outer_apply and not cross_apply: 3826 return None 3827 3828 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3829 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3830 kwargs["expressions"] = self._parse_csv( 3831 lambda: self._parse_table(parse_bracket=parse_bracket) 3832 ) 3833 3834 if method: 3835 kwargs["method"] = method.text 3836 if side: 3837 kwargs["side"] = side.text 3838 if kind: 3839 kwargs["kind"] = kind.text 3840 if hint: 3841 kwargs["hint"] = hint 3842 3843 if self._match(TokenType.MATCH_CONDITION): 3844 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3845 3846 if self._match(TokenType.ON): 3847 kwargs["on"] = self._parse_assignment() 3848 elif self._match(TokenType.USING): 3849 kwargs["using"] = self._parse_using_identifiers() 3850 elif ( 3851 not method 3852 and not (outer_apply or cross_apply) 3853 and not isinstance(kwargs["this"], exp.Unnest) 3854 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3855 ): 3856 index = self._index 3857 joins: t.Optional[list] = list(self._parse_joins()) 3858 3859 if joins and self._match(TokenType.ON): 3860 kwargs["on"] = self._parse_assignment() 3861 elif joins and self._match(TokenType.USING): 3862 kwargs["using"] = self._parse_using_identifiers() 3863 else: 3864 joins = None 3865 self._retreat(index) 3866 3867 kwargs["this"].set("joins", joins if joins else None) 3868 3869 kwargs["pivots"] = self._parse_pivots() 3870 3871 comments = [c for token in (method, side, kind) if token for c in token.comments] 3872 comments = (join_comments or []) + comments 3873 return self.expression(exp.Join, comments=comments, **kwargs) 3874 3875 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3876 this = self._parse_assignment() 3877 3878 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3879 return this 3880 3881 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3882 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3883 3884 return this 3885 3886 def _parse_index_params(self) -> exp.IndexParameters: 3887 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3888 3889 if self._match(TokenType.L_PAREN, advance=False): 3890 columns = self._parse_wrapped_csv(self._parse_with_operator) 3891 else: 3892 columns = None 3893 3894 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3895 partition_by = self._parse_partition_by() 3896 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3897 tablespace = ( 3898 self._parse_var(any_token=True) 3899 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3900 else None 3901 ) 3902 where = self._parse_where() 3903 3904 on = self._parse_field() if self._match(TokenType.ON) else None 3905 3906 return self.expression( 3907 exp.IndexParameters, 3908 using=using, 3909 columns=columns, 3910 include=include, 3911 partition_by=partition_by, 3912 where=where, 3913 with_storage=with_storage, 3914 tablespace=tablespace, 3915 on=on, 3916 ) 3917 3918 def _parse_index( 3919 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3920 ) -> t.Optional[exp.Index]: 3921 if index or anonymous: 3922 unique = None 3923 primary = None 3924 amp = None 3925 3926 self._match(TokenType.ON) 3927 self._match(TokenType.TABLE) # hive 3928 table = self._parse_table_parts(schema=True) 3929 else: 3930 unique = self._match(TokenType.UNIQUE) 3931 primary = self._match_text_seq("PRIMARY") 3932 amp = self._match_text_seq("AMP") 3933 3934 if not self._match(TokenType.INDEX): 3935 return None 3936 3937 index = self._parse_id_var() 3938 table = None 3939 3940 params = self._parse_index_params() 3941 3942 return self.expression( 3943 exp.Index, 3944 this=index, 3945 table=table, 3946 unique=unique, 3947 primary=primary, 3948 amp=amp, 3949 params=params, 3950 ) 3951 3952 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3953 hints: t.List[exp.Expression] = [] 3954 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3955 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3956 hints.append( 3957 self.expression( 3958 exp.WithTableHint, 3959 expressions=self._parse_csv( 3960 lambda: self._parse_function() or self._parse_var(any_token=True) 3961 ), 3962 ) 3963 ) 3964 self._match_r_paren() 3965 else: 3966 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3967 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3968 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3969 3970 self._match_set((TokenType.INDEX, TokenType.KEY)) 3971 if self._match(TokenType.FOR): 3972 hint.set("target", self._advance_any() and self._prev.text.upper()) 3973 3974 hint.set("expressions", self._parse_wrapped_id_vars()) 3975 hints.append(hint) 3976 3977 return hints or None 3978 3979 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3980 return ( 3981 (not schema and self._parse_function(optional_parens=False)) 3982 or self._parse_id_var(any_token=False) 3983 or self._parse_string_as_identifier() 3984 or self._parse_placeholder() 3985 ) 3986 3987 def _parse_table_parts( 3988 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3989 ) -> exp.Table: 3990 catalog = None 3991 db = None 3992 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3993 3994 while self._match(TokenType.DOT): 3995 if catalog: 3996 # This allows nesting the table in arbitrarily many dot expressions if needed 3997 table = self.expression( 3998 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3999 ) 4000 else: 4001 catalog = db 4002 db = table 4003 # "" used for tsql FROM a..b case 4004 table = self._parse_table_part(schema=schema) or "" 4005 4006 if ( 4007 wildcard 4008 and self._is_connected() 4009 and (isinstance(table, exp.Identifier) or not table) 4010 and self._match(TokenType.STAR) 4011 ): 4012 if isinstance(table, exp.Identifier): 4013 table.args["this"] += "*" 4014 else: 4015 table = exp.Identifier(this="*") 4016 4017 # We bubble up comments from the Identifier to the Table 4018 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4019 4020 if is_db_reference: 4021 catalog = db 4022 db = table 4023 table = None 4024 4025 if not table and not is_db_reference: 4026 self.raise_error(f"Expected table name but got {self._curr}") 4027 if not db and is_db_reference: 4028 self.raise_error(f"Expected database name but got {self._curr}") 4029 4030 table = self.expression( 4031 exp.Table, 4032 comments=comments, 4033 this=table, 4034 db=db, 4035 catalog=catalog, 4036 ) 4037 4038 changes = self._parse_changes() 4039 if changes: 4040 table.set("changes", changes) 4041 4042 at_before = self._parse_historical_data() 4043 if at_before: 4044 table.set("when", at_before) 4045 4046 pivots = self._parse_pivots() 4047 if pivots: 4048 table.set("pivots", pivots) 4049 4050 return table 4051 4052 def _parse_table( 4053 self, 4054 schema: bool = False, 4055 joins: bool = False, 4056 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4057 parse_bracket: bool = False, 4058 is_db_reference: bool = False, 4059 parse_partition: bool = False, 4060 consume_pipe: bool = False, 4061 ) -> t.Optional[exp.Expression]: 4062 lateral = self._parse_lateral() 4063 if lateral: 4064 return lateral 4065 4066 unnest = self._parse_unnest() 4067 if unnest: 4068 return unnest 4069 4070 values = self._parse_derived_table_values() 4071 if values: 4072 return values 4073 4074 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4075 if subquery: 4076 if not subquery.args.get("pivots"): 4077 subquery.set("pivots", self._parse_pivots()) 4078 return subquery 4079 4080 bracket = parse_bracket and self._parse_bracket(None) 4081 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4082 4083 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4084 self._parse_table 4085 ) 4086 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4087 4088 only = self._match(TokenType.ONLY) 4089 4090 this = t.cast( 4091 exp.Expression, 4092 bracket 4093 or rows_from 4094 or self._parse_bracket( 4095 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4096 ), 4097 ) 4098 4099 if only: 4100 this.set("only", only) 4101 4102 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4103 self._match_text_seq("*") 4104 4105 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4106 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4107 this.set("partition", self._parse_partition()) 4108 4109 if schema: 4110 return self._parse_schema(this=this) 4111 4112 version = self._parse_version() 4113 4114 if version: 4115 this.set("version", version) 4116 4117 if self.dialect.ALIAS_POST_TABLESAMPLE: 4118 this.set("sample", self._parse_table_sample()) 4119 4120 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4121 if alias: 4122 this.set("alias", alias) 4123 4124 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4125 return self.expression( 4126 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4127 ) 4128 4129 this.set("hints", self._parse_table_hints()) 4130 4131 if not this.args.get("pivots"): 4132 this.set("pivots", self._parse_pivots()) 4133 4134 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4135 this.set("sample", self._parse_table_sample()) 4136 4137 if joins: 4138 for join in self._parse_joins(): 4139 this.append("joins", join) 4140 4141 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4142 this.set("ordinality", True) 4143 this.set("alias", self._parse_table_alias()) 4144 4145 return this 4146 4147 def _parse_version(self) -> t.Optional[exp.Version]: 4148 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4149 this = "TIMESTAMP" 4150 elif self._match(TokenType.VERSION_SNAPSHOT): 4151 this = "VERSION" 4152 else: 4153 return None 4154 4155 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4156 kind = self._prev.text.upper() 4157 start = self._parse_bitwise() 4158 self._match_texts(("TO", "AND")) 4159 end = self._parse_bitwise() 4160 expression: t.Optional[exp.Expression] = self.expression( 4161 exp.Tuple, expressions=[start, end] 4162 ) 4163 elif self._match_text_seq("CONTAINED", "IN"): 4164 kind = "CONTAINED IN" 4165 expression = self.expression( 4166 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4167 ) 4168 elif self._match(TokenType.ALL): 4169 kind = "ALL" 4170 expression = None 4171 else: 4172 self._match_text_seq("AS", "OF") 4173 kind = "AS OF" 4174 expression = self._parse_type() 4175 4176 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4177 4178 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4179 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4180 index = self._index 4181 historical_data = None 4182 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4183 this = self._prev.text.upper() 4184 kind = ( 4185 self._match(TokenType.L_PAREN) 4186 and self._match_texts(self.HISTORICAL_DATA_KIND) 4187 and self._prev.text.upper() 4188 ) 4189 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4190 4191 if expression: 4192 self._match_r_paren() 4193 historical_data = self.expression( 4194 exp.HistoricalData, this=this, kind=kind, expression=expression 4195 ) 4196 else: 4197 self._retreat(index) 4198 4199 return historical_data 4200 4201 def _parse_changes(self) -> t.Optional[exp.Changes]: 4202 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4203 return None 4204 4205 information = self._parse_var(any_token=True) 4206 self._match_r_paren() 4207 4208 return self.expression( 4209 exp.Changes, 4210 information=information, 4211 at_before=self._parse_historical_data(), 4212 end=self._parse_historical_data(), 4213 ) 4214 4215 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4216 if not self._match(TokenType.UNNEST): 4217 return None 4218 4219 expressions = self._parse_wrapped_csv(self._parse_equality) 4220 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4221 4222 alias = self._parse_table_alias() if with_alias else None 4223 4224 if alias: 4225 if self.dialect.UNNEST_COLUMN_ONLY: 4226 if alias.args.get("columns"): 4227 self.raise_error("Unexpected extra column alias in unnest.") 4228 4229 alias.set("columns", [alias.this]) 4230 alias.set("this", None) 4231 4232 columns = alias.args.get("columns") or [] 4233 if offset and len(expressions) < len(columns): 4234 offset = columns.pop() 4235 4236 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4237 self._match(TokenType.ALIAS) 4238 offset = self._parse_id_var( 4239 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4240 ) or exp.to_identifier("offset") 4241 4242 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4243 4244 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4245 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4246 if not is_derived and not ( 4247 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4248 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4249 ): 4250 return None 4251 4252 expressions = self._parse_csv(self._parse_value) 4253 alias = self._parse_table_alias() 4254 4255 if is_derived: 4256 self._match_r_paren() 4257 4258 return self.expression( 4259 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4260 ) 4261 4262 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4263 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4264 as_modifier and self._match_text_seq("USING", "SAMPLE") 4265 ): 4266 return None 4267 4268 bucket_numerator = None 4269 bucket_denominator = None 4270 bucket_field = None 4271 percent = None 4272 size = None 4273 seed = None 4274 4275 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4276 matched_l_paren = self._match(TokenType.L_PAREN) 4277 4278 if self.TABLESAMPLE_CSV: 4279 num = None 4280 expressions = self._parse_csv(self._parse_primary) 4281 else: 4282 expressions = None 4283 num = ( 4284 self._parse_factor() 4285 if self._match(TokenType.NUMBER, advance=False) 4286 else self._parse_primary() or self._parse_placeholder() 4287 ) 4288 4289 if self._match_text_seq("BUCKET"): 4290 bucket_numerator = self._parse_number() 4291 self._match_text_seq("OUT", "OF") 4292 bucket_denominator = bucket_denominator = self._parse_number() 4293 self._match(TokenType.ON) 4294 bucket_field = self._parse_field() 4295 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4296 percent = num 4297 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4298 size = num 4299 else: 4300 percent = num 4301 4302 if matched_l_paren: 4303 self._match_r_paren() 4304 4305 if self._match(TokenType.L_PAREN): 4306 method = self._parse_var(upper=True) 4307 seed = self._match(TokenType.COMMA) and self._parse_number() 4308 self._match_r_paren() 4309 elif self._match_texts(("SEED", "REPEATABLE")): 4310 seed = self._parse_wrapped(self._parse_number) 4311 4312 if not method and self.DEFAULT_SAMPLING_METHOD: 4313 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4314 4315 return self.expression( 4316 exp.TableSample, 4317 expressions=expressions, 4318 method=method, 4319 bucket_numerator=bucket_numerator, 4320 bucket_denominator=bucket_denominator, 4321 bucket_field=bucket_field, 4322 percent=percent, 4323 size=size, 4324 seed=seed, 4325 ) 4326 4327 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4328 return list(iter(self._parse_pivot, None)) or None 4329 4330 def _parse_joins(self) -> t.Iterator[exp.Join]: 4331 return iter(self._parse_join, None) 4332 4333 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4334 if not self._match(TokenType.INTO): 4335 return None 4336 4337 return self.expression( 4338 exp.UnpivotColumns, 4339 this=self._match_text_seq("NAME") and self._parse_column(), 4340 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4341 ) 4342 4343 # https://duckdb.org/docs/sql/statements/pivot 4344 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4345 def _parse_on() -> t.Optional[exp.Expression]: 4346 this = self._parse_bitwise() 4347 4348 if self._match(TokenType.IN): 4349 # PIVOT ... ON col IN (row_val1, row_val2) 4350 return self._parse_in(this) 4351 if self._match(TokenType.ALIAS, advance=False): 4352 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4353 return self._parse_alias(this) 4354 4355 return this 4356 4357 this = self._parse_table() 4358 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4359 into = self._parse_unpivot_columns() 4360 using = self._match(TokenType.USING) and self._parse_csv( 4361 lambda: self._parse_alias(self._parse_function()) 4362 ) 4363 group = self._parse_group() 4364 4365 return self.expression( 4366 exp.Pivot, 4367 this=this, 4368 expressions=expressions, 4369 using=using, 4370 group=group, 4371 unpivot=is_unpivot, 4372 into=into, 4373 ) 4374 4375 def _parse_pivot_in(self) -> exp.In: 4376 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4377 this = self._parse_select_or_expression() 4378 4379 self._match(TokenType.ALIAS) 4380 alias = self._parse_bitwise() 4381 if alias: 4382 if isinstance(alias, exp.Column) and not alias.db: 4383 alias = alias.this 4384 return self.expression(exp.PivotAlias, this=this, alias=alias) 4385 4386 return this 4387 4388 value = self._parse_column() 4389 4390 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4391 self.raise_error("Expecting IN (") 4392 4393 if self._match(TokenType.ANY): 4394 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4395 else: 4396 exprs = self._parse_csv(_parse_aliased_expression) 4397 4398 self._match_r_paren() 4399 return self.expression(exp.In, this=value, expressions=exprs) 4400 4401 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4402 func = self._parse_function() 4403 if not func: 4404 self.raise_error("Expecting an aggregation function in PIVOT") 4405 4406 return self._parse_alias(func) 4407 4408 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4409 index = self._index 4410 include_nulls = None 4411 4412 if self._match(TokenType.PIVOT): 4413 unpivot = False 4414 elif self._match(TokenType.UNPIVOT): 4415 unpivot = True 4416 4417 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4418 if self._match_text_seq("INCLUDE", "NULLS"): 4419 include_nulls = True 4420 elif self._match_text_seq("EXCLUDE", "NULLS"): 4421 include_nulls = False 4422 else: 4423 return None 4424 4425 expressions = [] 4426 4427 if not self._match(TokenType.L_PAREN): 4428 self._retreat(index) 4429 return None 4430 4431 if unpivot: 4432 expressions = self._parse_csv(self._parse_column) 4433 else: 4434 expressions = self._parse_csv(self._parse_pivot_aggregation) 4435 4436 if not expressions: 4437 self.raise_error("Failed to parse PIVOT's aggregation list") 4438 4439 if not self._match(TokenType.FOR): 4440 self.raise_error("Expecting FOR") 4441 4442 fields = [] 4443 while True: 4444 field = self._try_parse(self._parse_pivot_in) 4445 if not field: 4446 break 4447 fields.append(field) 4448 4449 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4450 self._parse_bitwise 4451 ) 4452 4453 group = self._parse_group() 4454 4455 self._match_r_paren() 4456 4457 pivot = self.expression( 4458 exp.Pivot, 4459 expressions=expressions, 4460 fields=fields, 4461 unpivot=unpivot, 4462 include_nulls=include_nulls, 4463 default_on_null=default_on_null, 4464 group=group, 4465 ) 4466 4467 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4468 pivot.set("alias", self._parse_table_alias()) 4469 4470 if not unpivot: 4471 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4472 4473 columns: t.List[exp.Expression] = [] 4474 all_fields = [] 4475 for pivot_field in pivot.fields: 4476 pivot_field_expressions = pivot_field.expressions 4477 4478 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4479 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4480 continue 4481 4482 all_fields.append( 4483 [ 4484 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4485 for fld in pivot_field_expressions 4486 ] 4487 ) 4488 4489 if all_fields: 4490 if names: 4491 all_fields.append(names) 4492 4493 # Generate all possible combinations of the pivot columns 4494 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4495 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4496 for fld_parts_tuple in itertools.product(*all_fields): 4497 fld_parts = list(fld_parts_tuple) 4498 4499 if names and self.PREFIXED_PIVOT_COLUMNS: 4500 # Move the "name" to the front of the list 4501 fld_parts.insert(0, fld_parts.pop(-1)) 4502 4503 columns.append(exp.to_identifier("_".join(fld_parts))) 4504 4505 pivot.set("columns", columns) 4506 4507 return pivot 4508 4509 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4510 return [agg.alias for agg in aggregations if agg.alias] 4511 4512 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4513 if not skip_where_token and not self._match(TokenType.PREWHERE): 4514 return None 4515 4516 return self.expression( 4517 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4518 ) 4519 4520 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4521 if not skip_where_token and not self._match(TokenType.WHERE): 4522 return None 4523 4524 return self.expression( 4525 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4526 ) 4527 4528 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4529 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4530 return None 4531 comments = self._prev_comments 4532 4533 elements: t.Dict[str, t.Any] = defaultdict(list) 4534 4535 if self._match(TokenType.ALL): 4536 elements["all"] = True 4537 elif self._match(TokenType.DISTINCT): 4538 elements["all"] = False 4539 4540 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4541 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4542 4543 while True: 4544 index = self._index 4545 4546 elements["expressions"].extend( 4547 self._parse_csv( 4548 lambda: None 4549 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4550 else self._parse_assignment() 4551 ) 4552 ) 4553 4554 before_with_index = self._index 4555 with_prefix = self._match(TokenType.WITH) 4556 4557 if self._match(TokenType.ROLLUP): 4558 elements["rollup"].append( 4559 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4560 ) 4561 elif self._match(TokenType.CUBE): 4562 elements["cube"].append( 4563 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4564 ) 4565 elif self._match(TokenType.GROUPING_SETS): 4566 elements["grouping_sets"].append( 4567 self.expression( 4568 exp.GroupingSets, 4569 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4570 ) 4571 ) 4572 elif self._match_text_seq("TOTALS"): 4573 elements["totals"] = True # type: ignore 4574 4575 if before_with_index <= self._index <= before_with_index + 1: 4576 self._retreat(before_with_index) 4577 break 4578 4579 if index == self._index: 4580 break 4581 4582 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4583 4584 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4585 return self.expression( 4586 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4587 ) 4588 4589 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4590 if self._match(TokenType.L_PAREN): 4591 grouping_set = self._parse_csv(self._parse_column) 4592 self._match_r_paren() 4593 return self.expression(exp.Tuple, expressions=grouping_set) 4594 4595 return self._parse_column() 4596 4597 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4598 if not skip_having_token and not self._match(TokenType.HAVING): 4599 return None 4600 return self.expression( 4601 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4602 ) 4603 4604 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4605 if not self._match(TokenType.QUALIFY): 4606 return None 4607 return self.expression(exp.Qualify, this=self._parse_assignment()) 4608 4609 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4610 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4611 exp.Prior, this=self._parse_bitwise() 4612 ) 4613 connect = self._parse_assignment() 4614 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4615 return connect 4616 4617 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4618 if skip_start_token: 4619 start = None 4620 elif self._match(TokenType.START_WITH): 4621 start = self._parse_assignment() 4622 else: 4623 return None 4624 4625 self._match(TokenType.CONNECT_BY) 4626 nocycle = self._match_text_seq("NOCYCLE") 4627 connect = self._parse_connect_with_prior() 4628 4629 if not start and self._match(TokenType.START_WITH): 4630 start = self._parse_assignment() 4631 4632 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4633 4634 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4635 this = self._parse_id_var(any_token=True) 4636 if self._match(TokenType.ALIAS): 4637 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4638 return this 4639 4640 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4641 if self._match_text_seq("INTERPOLATE"): 4642 return self._parse_wrapped_csv(self._parse_name_as_expression) 4643 return None 4644 4645 def _parse_order( 4646 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4647 ) -> t.Optional[exp.Expression]: 4648 siblings = None 4649 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4650 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4651 return this 4652 4653 siblings = True 4654 4655 return self.expression( 4656 exp.Order, 4657 comments=self._prev_comments, 4658 this=this, 4659 expressions=self._parse_csv(self._parse_ordered), 4660 siblings=siblings, 4661 ) 4662 4663 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4664 if not self._match(token): 4665 return None 4666 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4667 4668 def _parse_ordered( 4669 self, parse_method: t.Optional[t.Callable] = None 4670 ) -> t.Optional[exp.Ordered]: 4671 this = parse_method() if parse_method else self._parse_assignment() 4672 if not this: 4673 return None 4674 4675 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4676 this = exp.var("ALL") 4677 4678 asc = self._match(TokenType.ASC) 4679 desc = self._match(TokenType.DESC) or (asc and False) 4680 4681 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4682 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4683 4684 nulls_first = is_nulls_first or False 4685 explicitly_null_ordered = is_nulls_first or is_nulls_last 4686 4687 if ( 4688 not explicitly_null_ordered 4689 and ( 4690 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4691 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4692 ) 4693 and self.dialect.NULL_ORDERING != "nulls_are_last" 4694 ): 4695 nulls_first = True 4696 4697 if self._match_text_seq("WITH", "FILL"): 4698 with_fill = self.expression( 4699 exp.WithFill, 4700 **{ # type: ignore 4701 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4702 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4703 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4704 "interpolate": self._parse_interpolate(), 4705 }, 4706 ) 4707 else: 4708 with_fill = None 4709 4710 return self.expression( 4711 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4712 ) 4713 4714 def _parse_limit_options(self) -> exp.LimitOptions: 4715 percent = self._match(TokenType.PERCENT) 4716 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4717 self._match_text_seq("ONLY") 4718 with_ties = self._match_text_seq("WITH", "TIES") 4719 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4720 4721 def _parse_limit( 4722 self, 4723 this: t.Optional[exp.Expression] = None, 4724 top: bool = False, 4725 skip_limit_token: bool = False, 4726 ) -> t.Optional[exp.Expression]: 4727 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4728 comments = self._prev_comments 4729 if top: 4730 limit_paren = self._match(TokenType.L_PAREN) 4731 expression = self._parse_term() if limit_paren else self._parse_number() 4732 4733 if limit_paren: 4734 self._match_r_paren() 4735 4736 limit_options = self._parse_limit_options() 4737 else: 4738 limit_options = None 4739 expression = self._parse_term() 4740 4741 if self._match(TokenType.COMMA): 4742 offset = expression 4743 expression = self._parse_term() 4744 else: 4745 offset = None 4746 4747 limit_exp = self.expression( 4748 exp.Limit, 4749 this=this, 4750 expression=expression, 4751 offset=offset, 4752 comments=comments, 4753 limit_options=limit_options, 4754 expressions=self._parse_limit_by(), 4755 ) 4756 4757 return limit_exp 4758 4759 if self._match(TokenType.FETCH): 4760 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4761 direction = self._prev.text.upper() if direction else "FIRST" 4762 4763 count = self._parse_field(tokens=self.FETCH_TOKENS) 4764 4765 return self.expression( 4766 exp.Fetch, 4767 direction=direction, 4768 count=count, 4769 limit_options=self._parse_limit_options(), 4770 ) 4771 4772 return this 4773 4774 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4775 if not self._match(TokenType.OFFSET): 4776 return this 4777 4778 count = self._parse_term() 4779 self._match_set((TokenType.ROW, TokenType.ROWS)) 4780 4781 return self.expression( 4782 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4783 ) 4784 4785 def _can_parse_limit_or_offset(self) -> bool: 4786 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4787 return False 4788 4789 index = self._index 4790 result = bool( 4791 self._try_parse(self._parse_limit, retreat=True) 4792 or self._try_parse(self._parse_offset, retreat=True) 4793 ) 4794 self._retreat(index) 4795 return result 4796 4797 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4798 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4799 4800 def _parse_locks(self) -> t.List[exp.Lock]: 4801 locks = [] 4802 while True: 4803 update, key = None, None 4804 if self._match_text_seq("FOR", "UPDATE"): 4805 update = True 4806 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4807 "LOCK", "IN", "SHARE", "MODE" 4808 ): 4809 update = False 4810 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4811 update, key = False, True 4812 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4813 update, key = True, True 4814 else: 4815 break 4816 4817 expressions = None 4818 if self._match_text_seq("OF"): 4819 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4820 4821 wait: t.Optional[bool | exp.Expression] = None 4822 if self._match_text_seq("NOWAIT"): 4823 wait = True 4824 elif self._match_text_seq("WAIT"): 4825 wait = self._parse_primary() 4826 elif self._match_text_seq("SKIP", "LOCKED"): 4827 wait = False 4828 4829 locks.append( 4830 self.expression( 4831 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4832 ) 4833 ) 4834 4835 return locks 4836 4837 def parse_set_operation( 4838 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4839 ) -> t.Optional[exp.Expression]: 4840 start = self._index 4841 _, side_token, kind_token = self._parse_join_parts() 4842 4843 side = side_token.text if side_token else None 4844 kind = kind_token.text if kind_token else None 4845 4846 if not self._match_set(self.SET_OPERATIONS): 4847 self._retreat(start) 4848 return None 4849 4850 token_type = self._prev.token_type 4851 4852 if token_type == TokenType.UNION: 4853 operation: t.Type[exp.SetOperation] = exp.Union 4854 elif token_type == TokenType.EXCEPT: 4855 operation = exp.Except 4856 else: 4857 operation = exp.Intersect 4858 4859 comments = self._prev.comments 4860 4861 if self._match(TokenType.DISTINCT): 4862 distinct: t.Optional[bool] = True 4863 elif self._match(TokenType.ALL): 4864 distinct = False 4865 else: 4866 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4867 if distinct is None: 4868 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4869 4870 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4871 "STRICT", "CORRESPONDING" 4872 ) 4873 if self._match_text_seq("CORRESPONDING"): 4874 by_name = True 4875 if not side and not kind: 4876 kind = "INNER" 4877 4878 on_column_list = None 4879 if by_name and self._match_texts(("ON", "BY")): 4880 on_column_list = self._parse_wrapped_csv(self._parse_column) 4881 4882 expression = self._parse_select( 4883 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4884 ) 4885 4886 return self.expression( 4887 operation, 4888 comments=comments, 4889 this=this, 4890 distinct=distinct, 4891 by_name=by_name, 4892 expression=expression, 4893 side=side, 4894 kind=kind, 4895 on=on_column_list, 4896 ) 4897 4898 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4899 while this: 4900 setop = self.parse_set_operation(this) 4901 if not setop: 4902 break 4903 this = setop 4904 4905 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4906 expression = this.expression 4907 4908 if expression: 4909 for arg in self.SET_OP_MODIFIERS: 4910 expr = expression.args.get(arg) 4911 if expr: 4912 this.set(arg, expr.pop()) 4913 4914 return this 4915 4916 def _parse_expression(self) -> t.Optional[exp.Expression]: 4917 return self._parse_alias(self._parse_assignment()) 4918 4919 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4920 this = self._parse_disjunction() 4921 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4922 # This allows us to parse <non-identifier token> := <expr> 4923 this = exp.column( 4924 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4925 ) 4926 4927 while self._match_set(self.ASSIGNMENT): 4928 if isinstance(this, exp.Column) and len(this.parts) == 1: 4929 this = this.this 4930 4931 this = self.expression( 4932 self.ASSIGNMENT[self._prev.token_type], 4933 this=this, 4934 comments=self._prev_comments, 4935 expression=self._parse_assignment(), 4936 ) 4937 4938 return this 4939 4940 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4941 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4942 4943 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4944 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4945 4946 def _parse_equality(self) -> t.Optional[exp.Expression]: 4947 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4948 4949 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4950 return self._parse_tokens(self._parse_range, self.COMPARISON) 4951 4952 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4953 this = this or self._parse_bitwise() 4954 negate = self._match(TokenType.NOT) 4955 4956 if self._match_set(self.RANGE_PARSERS): 4957 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4958 if not expression: 4959 return this 4960 4961 this = expression 4962 elif self._match(TokenType.ISNULL): 4963 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4964 4965 # Postgres supports ISNULL and NOTNULL for conditions. 4966 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4967 if self._match(TokenType.NOTNULL): 4968 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4969 this = self.expression(exp.Not, this=this) 4970 4971 if negate: 4972 this = self._negate_range(this) 4973 4974 if self._match(TokenType.IS): 4975 this = self._parse_is(this) 4976 4977 return this 4978 4979 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4980 if not this: 4981 return this 4982 4983 return self.expression(exp.Not, this=this) 4984 4985 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4986 index = self._index - 1 4987 negate = self._match(TokenType.NOT) 4988 4989 if self._match_text_seq("DISTINCT", "FROM"): 4990 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4991 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4992 4993 if self._match(TokenType.JSON): 4994 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4995 4996 if self._match_text_seq("WITH"): 4997 _with = True 4998 elif self._match_text_seq("WITHOUT"): 4999 _with = False 5000 else: 5001 _with = None 5002 5003 unique = self._match(TokenType.UNIQUE) 5004 self._match_text_seq("KEYS") 5005 expression: t.Optional[exp.Expression] = self.expression( 5006 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5007 ) 5008 else: 5009 expression = self._parse_primary() or self._parse_null() 5010 if not expression: 5011 self._retreat(index) 5012 return None 5013 5014 this = self.expression(exp.Is, this=this, expression=expression) 5015 return self.expression(exp.Not, this=this) if negate else this 5016 5017 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5018 unnest = self._parse_unnest(with_alias=False) 5019 if unnest: 5020 this = self.expression(exp.In, this=this, unnest=unnest) 5021 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5022 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5023 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5024 5025 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5026 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5027 else: 5028 this = self.expression(exp.In, this=this, expressions=expressions) 5029 5030 if matched_l_paren: 5031 self._match_r_paren(this) 5032 elif not self._match(TokenType.R_BRACKET, expression=this): 5033 self.raise_error("Expecting ]") 5034 else: 5035 this = self.expression(exp.In, this=this, field=self._parse_column()) 5036 5037 return this 5038 5039 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5040 symmetric = None 5041 if self._match_text_seq("SYMMETRIC"): 5042 symmetric = True 5043 elif self._match_text_seq("ASYMMETRIC"): 5044 symmetric = False 5045 5046 low = self._parse_bitwise() 5047 self._match(TokenType.AND) 5048 high = self._parse_bitwise() 5049 5050 return self.expression( 5051 exp.Between, 5052 this=this, 5053 low=low, 5054 high=high, 5055 symmetric=symmetric, 5056 ) 5057 5058 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5059 if not self._match(TokenType.ESCAPE): 5060 return this 5061 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5062 5063 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5064 index = self._index 5065 5066 if not self._match(TokenType.INTERVAL) and match_interval: 5067 return None 5068 5069 if self._match(TokenType.STRING, advance=False): 5070 this = self._parse_primary() 5071 else: 5072 this = self._parse_term() 5073 5074 if not this or ( 5075 isinstance(this, exp.Column) 5076 and not this.table 5077 and not this.this.quoted 5078 and this.name.upper() == "IS" 5079 ): 5080 self._retreat(index) 5081 return None 5082 5083 unit = self._parse_function() or ( 5084 not self._match(TokenType.ALIAS, advance=False) 5085 and self._parse_var(any_token=True, upper=True) 5086 ) 5087 5088 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5089 # each INTERVAL expression into this canonical form so it's easy to transpile 5090 if this and this.is_number: 5091 this = exp.Literal.string(this.to_py()) 5092 elif this and this.is_string: 5093 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5094 if parts and unit: 5095 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5096 unit = None 5097 self._retreat(self._index - 1) 5098 5099 if len(parts) == 1: 5100 this = exp.Literal.string(parts[0][0]) 5101 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5102 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5103 unit = self.expression( 5104 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5105 ) 5106 5107 interval = self.expression(exp.Interval, this=this, unit=unit) 5108 5109 index = self._index 5110 self._match(TokenType.PLUS) 5111 5112 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5113 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5114 return self.expression( 5115 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5116 ) 5117 5118 self._retreat(index) 5119 return interval 5120 5121 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5122 this = self._parse_term() 5123 5124 while True: 5125 if self._match_set(self.BITWISE): 5126 this = self.expression( 5127 self.BITWISE[self._prev.token_type], 5128 this=this, 5129 expression=self._parse_term(), 5130 ) 5131 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5132 this = self.expression( 5133 exp.DPipe, 5134 this=this, 5135 expression=self._parse_term(), 5136 safe=not self.dialect.STRICT_STRING_CONCAT, 5137 ) 5138 elif self._match(TokenType.DQMARK): 5139 this = self.expression( 5140 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5141 ) 5142 elif self._match_pair(TokenType.LT, TokenType.LT): 5143 this = self.expression( 5144 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5145 ) 5146 elif self._match_pair(TokenType.GT, TokenType.GT): 5147 this = self.expression( 5148 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5149 ) 5150 else: 5151 break 5152 5153 return this 5154 5155 def _parse_term(self) -> t.Optional[exp.Expression]: 5156 this = self._parse_factor() 5157 5158 while self._match_set(self.TERM): 5159 klass = self.TERM[self._prev.token_type] 5160 comments = self._prev_comments 5161 expression = self._parse_factor() 5162 5163 this = self.expression(klass, this=this, comments=comments, expression=expression) 5164 5165 if isinstance(this, exp.Collate): 5166 expr = this.expression 5167 5168 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5169 # fallback to Identifier / Var 5170 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5171 ident = expr.this 5172 if isinstance(ident, exp.Identifier): 5173 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5174 5175 return this 5176 5177 def _parse_factor(self) -> t.Optional[exp.Expression]: 5178 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5179 this = parse_method() 5180 5181 while self._match_set(self.FACTOR): 5182 klass = self.FACTOR[self._prev.token_type] 5183 comments = self._prev_comments 5184 expression = parse_method() 5185 5186 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5187 self._retreat(self._index - 1) 5188 return this 5189 5190 this = self.expression(klass, this=this, comments=comments, expression=expression) 5191 5192 if isinstance(this, exp.Div): 5193 this.args["typed"] = self.dialect.TYPED_DIVISION 5194 this.args["safe"] = self.dialect.SAFE_DIVISION 5195 5196 return this 5197 5198 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5199 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5200 5201 def _parse_unary(self) -> t.Optional[exp.Expression]: 5202 if self._match_set(self.UNARY_PARSERS): 5203 return self.UNARY_PARSERS[self._prev.token_type](self) 5204 return self._parse_at_time_zone(self._parse_type()) 5205 5206 def _parse_type( 5207 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5208 ) -> t.Optional[exp.Expression]: 5209 interval = parse_interval and self._parse_interval() 5210 if interval: 5211 return interval 5212 5213 index = self._index 5214 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5215 5216 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5217 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5218 if isinstance(data_type, exp.Cast): 5219 # This constructor can contain ops directly after it, for instance struct unnesting: 5220 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5221 return self._parse_column_ops(data_type) 5222 5223 if data_type: 5224 index2 = self._index 5225 this = self._parse_primary() 5226 5227 if isinstance(this, exp.Literal): 5228 literal = this.name 5229 this = self._parse_column_ops(this) 5230 5231 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5232 if parser: 5233 return parser(self, this, data_type) 5234 5235 if ( 5236 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5237 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5238 and TIME_ZONE_RE.search(literal) 5239 ): 5240 data_type = exp.DataType.build("TIMESTAMPTZ") 5241 5242 return self.expression(exp.Cast, this=this, to=data_type) 5243 5244 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5245 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5246 # 5247 # If the index difference here is greater than 1, that means the parser itself must have 5248 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5249 # 5250 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5251 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5252 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5253 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5254 # 5255 # In these cases, we don't really want to return the converted type, but instead retreat 5256 # and try to parse a Column or Identifier in the section below. 5257 if data_type.expressions and index2 - index > 1: 5258 self._retreat(index2) 5259 return self._parse_column_ops(data_type) 5260 5261 self._retreat(index) 5262 5263 if fallback_to_identifier: 5264 return self._parse_id_var() 5265 5266 this = self._parse_column() 5267 return this and self._parse_column_ops(this) 5268 5269 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5270 this = self._parse_type() 5271 if not this: 5272 return None 5273 5274 if isinstance(this, exp.Column) and not this.table: 5275 this = exp.var(this.name.upper()) 5276 5277 return self.expression( 5278 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5279 ) 5280 5281 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5282 type_name = identifier.name 5283 5284 while self._match(TokenType.DOT): 5285 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5286 5287 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5288 5289 def _parse_types( 5290 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5291 ) -> t.Optional[exp.Expression]: 5292 index = self._index 5293 5294 this: t.Optional[exp.Expression] = None 5295 prefix = self._match_text_seq("SYSUDTLIB", ".") 5296 5297 if not self._match_set(self.TYPE_TOKENS): 5298 identifier = allow_identifiers and self._parse_id_var( 5299 any_token=False, tokens=(TokenType.VAR,) 5300 ) 5301 if isinstance(identifier, exp.Identifier): 5302 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5303 5304 if len(tokens) != 1: 5305 self.raise_error("Unexpected identifier", self._prev) 5306 5307 if tokens[0].token_type in self.TYPE_TOKENS: 5308 self._prev = tokens[0] 5309 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5310 this = self._parse_user_defined_type(identifier) 5311 else: 5312 self._retreat(self._index - 1) 5313 return None 5314 else: 5315 return None 5316 5317 type_token = self._prev.token_type 5318 5319 if type_token == TokenType.PSEUDO_TYPE: 5320 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5321 5322 if type_token == TokenType.OBJECT_IDENTIFIER: 5323 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5324 5325 # https://materialize.com/docs/sql/types/map/ 5326 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5327 key_type = self._parse_types( 5328 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5329 ) 5330 if not self._match(TokenType.FARROW): 5331 self._retreat(index) 5332 return None 5333 5334 value_type = self._parse_types( 5335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5336 ) 5337 if not self._match(TokenType.R_BRACKET): 5338 self._retreat(index) 5339 return None 5340 5341 return exp.DataType( 5342 this=exp.DataType.Type.MAP, 5343 expressions=[key_type, value_type], 5344 nested=True, 5345 prefix=prefix, 5346 ) 5347 5348 nested = type_token in self.NESTED_TYPE_TOKENS 5349 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5350 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5351 expressions = None 5352 maybe_func = False 5353 5354 if self._match(TokenType.L_PAREN): 5355 if is_struct: 5356 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5357 elif nested: 5358 expressions = self._parse_csv( 5359 lambda: self._parse_types( 5360 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5361 ) 5362 ) 5363 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5364 this = expressions[0] 5365 this.set("nullable", True) 5366 self._match_r_paren() 5367 return this 5368 elif type_token in self.ENUM_TYPE_TOKENS: 5369 expressions = self._parse_csv(self._parse_equality) 5370 elif is_aggregate: 5371 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5372 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5373 ) 5374 if not func_or_ident: 5375 return None 5376 expressions = [func_or_ident] 5377 if self._match(TokenType.COMMA): 5378 expressions.extend( 5379 self._parse_csv( 5380 lambda: self._parse_types( 5381 check_func=check_func, 5382 schema=schema, 5383 allow_identifiers=allow_identifiers, 5384 ) 5385 ) 5386 ) 5387 else: 5388 expressions = self._parse_csv(self._parse_type_size) 5389 5390 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5391 if type_token == TokenType.VECTOR and len(expressions) == 2: 5392 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5393 5394 if not expressions or not self._match(TokenType.R_PAREN): 5395 self._retreat(index) 5396 return None 5397 5398 maybe_func = True 5399 5400 values: t.Optional[t.List[exp.Expression]] = None 5401 5402 if nested and self._match(TokenType.LT): 5403 if is_struct: 5404 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5405 else: 5406 expressions = self._parse_csv( 5407 lambda: self._parse_types( 5408 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5409 ) 5410 ) 5411 5412 if not self._match(TokenType.GT): 5413 self.raise_error("Expecting >") 5414 5415 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5416 values = self._parse_csv(self._parse_assignment) 5417 if not values and is_struct: 5418 values = None 5419 self._retreat(self._index - 1) 5420 else: 5421 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5422 5423 if type_token in self.TIMESTAMPS: 5424 if self._match_text_seq("WITH", "TIME", "ZONE"): 5425 maybe_func = False 5426 tz_type = ( 5427 exp.DataType.Type.TIMETZ 5428 if type_token in self.TIMES 5429 else exp.DataType.Type.TIMESTAMPTZ 5430 ) 5431 this = exp.DataType(this=tz_type, expressions=expressions) 5432 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5433 maybe_func = False 5434 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5435 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5436 maybe_func = False 5437 elif type_token == TokenType.INTERVAL: 5438 unit = self._parse_var(upper=True) 5439 if unit: 5440 if self._match_text_seq("TO"): 5441 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5442 5443 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5444 else: 5445 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5446 elif type_token == TokenType.VOID: 5447 this = exp.DataType(this=exp.DataType.Type.NULL) 5448 5449 if maybe_func and check_func: 5450 index2 = self._index 5451 peek = self._parse_string() 5452 5453 if not peek: 5454 self._retreat(index) 5455 return None 5456 5457 self._retreat(index2) 5458 5459 if not this: 5460 if self._match_text_seq("UNSIGNED"): 5461 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5462 if not unsigned_type_token: 5463 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5464 5465 type_token = unsigned_type_token or type_token 5466 5467 this = exp.DataType( 5468 this=exp.DataType.Type[type_token.value], 5469 expressions=expressions, 5470 nested=nested, 5471 prefix=prefix, 5472 ) 5473 5474 # Empty arrays/structs are allowed 5475 if values is not None: 5476 cls = exp.Struct if is_struct else exp.Array 5477 this = exp.cast(cls(expressions=values), this, copy=False) 5478 5479 elif expressions: 5480 this.set("expressions", expressions) 5481 5482 # https://materialize.com/docs/sql/types/list/#type-name 5483 while self._match(TokenType.LIST): 5484 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5485 5486 index = self._index 5487 5488 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5489 matched_array = self._match(TokenType.ARRAY) 5490 5491 while self._curr: 5492 datatype_token = self._prev.token_type 5493 matched_l_bracket = self._match(TokenType.L_BRACKET) 5494 5495 if (not matched_l_bracket and not matched_array) or ( 5496 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5497 ): 5498 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5499 # not to be confused with the fixed size array parsing 5500 break 5501 5502 matched_array = False 5503 values = self._parse_csv(self._parse_assignment) or None 5504 if ( 5505 values 5506 and not schema 5507 and ( 5508 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5509 ) 5510 ): 5511 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5512 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5513 self._retreat(index) 5514 break 5515 5516 this = exp.DataType( 5517 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5518 ) 5519 self._match(TokenType.R_BRACKET) 5520 5521 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5522 converter = self.TYPE_CONVERTERS.get(this.this) 5523 if converter: 5524 this = converter(t.cast(exp.DataType, this)) 5525 5526 return this 5527 5528 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5529 index = self._index 5530 5531 if ( 5532 self._curr 5533 and self._next 5534 and self._curr.token_type in self.TYPE_TOKENS 5535 and self._next.token_type in self.TYPE_TOKENS 5536 ): 5537 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5538 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5539 this = self._parse_id_var() 5540 else: 5541 this = ( 5542 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5543 or self._parse_id_var() 5544 ) 5545 5546 self._match(TokenType.COLON) 5547 5548 if ( 5549 type_required 5550 and not isinstance(this, exp.DataType) 5551 and not self._match_set(self.TYPE_TOKENS, advance=False) 5552 ): 5553 self._retreat(index) 5554 return self._parse_types() 5555 5556 return self._parse_column_def(this) 5557 5558 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5559 if not self._match_text_seq("AT", "TIME", "ZONE"): 5560 return this 5561 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5562 5563 def _parse_column(self) -> t.Optional[exp.Expression]: 5564 this = self._parse_column_reference() 5565 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5566 5567 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5568 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5569 5570 return column 5571 5572 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5573 this = self._parse_field() 5574 if ( 5575 not this 5576 and self._match(TokenType.VALUES, advance=False) 5577 and self.VALUES_FOLLOWED_BY_PAREN 5578 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5579 ): 5580 this = self._parse_id_var() 5581 5582 if isinstance(this, exp.Identifier): 5583 # We bubble up comments from the Identifier to the Column 5584 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5585 5586 return this 5587 5588 def _parse_colon_as_variant_extract( 5589 self, this: t.Optional[exp.Expression] 5590 ) -> t.Optional[exp.Expression]: 5591 casts = [] 5592 json_path = [] 5593 escape = None 5594 5595 while self._match(TokenType.COLON): 5596 start_index = self._index 5597 5598 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5599 path = self._parse_column_ops( 5600 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5601 ) 5602 5603 # The cast :: operator has a lower precedence than the extraction operator :, so 5604 # we rearrange the AST appropriately to avoid casting the JSON path 5605 while isinstance(path, exp.Cast): 5606 casts.append(path.to) 5607 path = path.this 5608 5609 if casts: 5610 dcolon_offset = next( 5611 i 5612 for i, t in enumerate(self._tokens[start_index:]) 5613 if t.token_type == TokenType.DCOLON 5614 ) 5615 end_token = self._tokens[start_index + dcolon_offset - 1] 5616 else: 5617 end_token = self._prev 5618 5619 if path: 5620 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5621 # it'll roundtrip to a string literal in GET_PATH 5622 if isinstance(path, exp.Identifier) and path.quoted: 5623 escape = True 5624 5625 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5626 5627 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5628 # Databricks transforms it back to the colon/dot notation 5629 if json_path: 5630 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5631 5632 if json_path_expr: 5633 json_path_expr.set("escape", escape) 5634 5635 this = self.expression( 5636 exp.JSONExtract, 5637 this=this, 5638 expression=json_path_expr, 5639 variant_extract=True, 5640 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5641 ) 5642 5643 while casts: 5644 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5645 5646 return this 5647 5648 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5649 return self._parse_types() 5650 5651 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5652 this = self._parse_bracket(this) 5653 5654 while self._match_set(self.COLUMN_OPERATORS): 5655 op_token = self._prev.token_type 5656 op = self.COLUMN_OPERATORS.get(op_token) 5657 5658 if op_token in self.CAST_COLUMN_OPERATORS: 5659 field = self._parse_dcolon() 5660 if not field: 5661 self.raise_error("Expected type") 5662 elif op and self._curr: 5663 field = self._parse_column_reference() or self._parse_bracket() 5664 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5665 field = self._parse_column_ops(field) 5666 else: 5667 field = self._parse_field(any_token=True, anonymous_func=True) 5668 5669 # Function calls can be qualified, e.g., x.y.FOO() 5670 # This converts the final AST to a series of Dots leading to the function call 5671 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5672 if isinstance(field, (exp.Func, exp.Window)) and this: 5673 this = this.transform( 5674 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5675 ) 5676 5677 if op: 5678 this = op(self, this, field) 5679 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5680 this = self.expression( 5681 exp.Column, 5682 comments=this.comments, 5683 this=field, 5684 table=this.this, 5685 db=this.args.get("table"), 5686 catalog=this.args.get("db"), 5687 ) 5688 elif isinstance(field, exp.Window): 5689 # Move the exp.Dot's to the window's function 5690 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5691 field.set("this", window_func) 5692 this = field 5693 else: 5694 this = self.expression(exp.Dot, this=this, expression=field) 5695 5696 if field and field.comments: 5697 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5698 5699 this = self._parse_bracket(this) 5700 5701 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5702 5703 def _parse_paren(self) -> t.Optional[exp.Expression]: 5704 if not self._match(TokenType.L_PAREN): 5705 return None 5706 5707 comments = self._prev_comments 5708 query = self._parse_select() 5709 5710 if query: 5711 expressions = [query] 5712 else: 5713 expressions = self._parse_expressions() 5714 5715 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5716 5717 if not this and self._match(TokenType.R_PAREN, advance=False): 5718 this = self.expression(exp.Tuple) 5719 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5720 this = self._parse_subquery(this=this, parse_alias=False) 5721 elif isinstance(this, exp.Subquery): 5722 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5723 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5724 this = self.expression(exp.Tuple, expressions=expressions) 5725 else: 5726 this = self.expression(exp.Paren, this=this) 5727 5728 if this: 5729 this.add_comments(comments) 5730 5731 self._match_r_paren(expression=this) 5732 return this 5733 5734 def _parse_primary(self) -> t.Optional[exp.Expression]: 5735 if self._match_set(self.PRIMARY_PARSERS): 5736 token_type = self._prev.token_type 5737 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5738 5739 if token_type == TokenType.STRING: 5740 expressions = [primary] 5741 while self._match(TokenType.STRING): 5742 expressions.append(exp.Literal.string(self._prev.text)) 5743 5744 if len(expressions) > 1: 5745 return self.expression(exp.Concat, expressions=expressions) 5746 5747 return primary 5748 5749 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5750 return exp.Literal.number(f"0.{self._prev.text}") 5751 5752 return self._parse_paren() 5753 5754 def _parse_field( 5755 self, 5756 any_token: bool = False, 5757 tokens: t.Optional[t.Collection[TokenType]] = None, 5758 anonymous_func: bool = False, 5759 ) -> t.Optional[exp.Expression]: 5760 if anonymous_func: 5761 field = ( 5762 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5763 or self._parse_primary() 5764 ) 5765 else: 5766 field = self._parse_primary() or self._parse_function( 5767 anonymous=anonymous_func, any_token=any_token 5768 ) 5769 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5770 5771 def _parse_function( 5772 self, 5773 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5774 anonymous: bool = False, 5775 optional_parens: bool = True, 5776 any_token: bool = False, 5777 ) -> t.Optional[exp.Expression]: 5778 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5779 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5780 fn_syntax = False 5781 if ( 5782 self._match(TokenType.L_BRACE, advance=False) 5783 and self._next 5784 and self._next.text.upper() == "FN" 5785 ): 5786 self._advance(2) 5787 fn_syntax = True 5788 5789 func = self._parse_function_call( 5790 functions=functions, 5791 anonymous=anonymous, 5792 optional_parens=optional_parens, 5793 any_token=any_token, 5794 ) 5795 5796 if fn_syntax: 5797 self._match(TokenType.R_BRACE) 5798 5799 return func 5800 5801 def _parse_function_call( 5802 self, 5803 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5804 anonymous: bool = False, 5805 optional_parens: bool = True, 5806 any_token: bool = False, 5807 ) -> t.Optional[exp.Expression]: 5808 if not self._curr: 5809 return None 5810 5811 comments = self._curr.comments 5812 prev = self._prev 5813 token = self._curr 5814 token_type = self._curr.token_type 5815 this = self._curr.text 5816 upper = this.upper() 5817 5818 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5819 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5820 self._advance() 5821 return self._parse_window(parser(self)) 5822 5823 if not self._next or self._next.token_type != TokenType.L_PAREN: 5824 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5825 self._advance() 5826 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5827 5828 return None 5829 5830 if any_token: 5831 if token_type in self.RESERVED_TOKENS: 5832 return None 5833 elif token_type not in self.FUNC_TOKENS: 5834 return None 5835 5836 self._advance(2) 5837 5838 parser = self.FUNCTION_PARSERS.get(upper) 5839 if parser and not anonymous: 5840 this = parser(self) 5841 else: 5842 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5843 5844 if subquery_predicate: 5845 expr = None 5846 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5847 expr = self._parse_select() 5848 self._match_r_paren() 5849 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5850 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5851 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5852 self._advance(-1) 5853 expr = self._parse_bitwise() 5854 5855 if expr: 5856 return self.expression(subquery_predicate, comments=comments, this=expr) 5857 5858 if functions is None: 5859 functions = self.FUNCTIONS 5860 5861 function = functions.get(upper) 5862 known_function = function and not anonymous 5863 5864 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5865 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5866 5867 post_func_comments = self._curr and self._curr.comments 5868 if known_function and post_func_comments: 5869 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5870 # call we'll construct it as exp.Anonymous, even if it's "known" 5871 if any( 5872 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5873 for comment in post_func_comments 5874 ): 5875 known_function = False 5876 5877 if alias and known_function: 5878 args = self._kv_to_prop_eq(args) 5879 5880 if known_function: 5881 func_builder = t.cast(t.Callable, function) 5882 5883 if "dialect" in func_builder.__code__.co_varnames: 5884 func = func_builder(args, dialect=self.dialect) 5885 else: 5886 func = func_builder(args) 5887 5888 func = self.validate_expression(func, args) 5889 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5890 func.meta["name"] = this 5891 5892 this = func 5893 else: 5894 if token_type == TokenType.IDENTIFIER: 5895 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5896 5897 this = self.expression(exp.Anonymous, this=this, expressions=args) 5898 this = this.update_positions(token) 5899 5900 if isinstance(this, exp.Expression): 5901 this.add_comments(comments) 5902 5903 self._match_r_paren(this) 5904 return self._parse_window(this) 5905 5906 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5907 return expression 5908 5909 def _kv_to_prop_eq( 5910 self, expressions: t.List[exp.Expression], parse_map: bool = False 5911 ) -> t.List[exp.Expression]: 5912 transformed = [] 5913 5914 for index, e in enumerate(expressions): 5915 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5916 if isinstance(e, exp.Alias): 5917 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5918 5919 if not isinstance(e, exp.PropertyEQ): 5920 e = self.expression( 5921 exp.PropertyEQ, 5922 this=e.this if parse_map else exp.to_identifier(e.this.name), 5923 expression=e.expression, 5924 ) 5925 5926 if isinstance(e.this, exp.Column): 5927 e.this.replace(e.this.this) 5928 else: 5929 e = self._to_prop_eq(e, index) 5930 5931 transformed.append(e) 5932 5933 return transformed 5934 5935 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5936 return self._parse_statement() 5937 5938 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5939 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5940 5941 def _parse_user_defined_function( 5942 self, kind: t.Optional[TokenType] = None 5943 ) -> t.Optional[exp.Expression]: 5944 this = self._parse_table_parts(schema=True) 5945 5946 if not self._match(TokenType.L_PAREN): 5947 return this 5948 5949 expressions = self._parse_csv(self._parse_function_parameter) 5950 self._match_r_paren() 5951 return self.expression( 5952 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5953 ) 5954 5955 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5956 literal = self._parse_primary() 5957 if literal: 5958 return self.expression(exp.Introducer, this=token.text, expression=literal) 5959 5960 return self._identifier_expression(token) 5961 5962 def _parse_session_parameter(self) -> exp.SessionParameter: 5963 kind = None 5964 this = self._parse_id_var() or self._parse_primary() 5965 5966 if this and self._match(TokenType.DOT): 5967 kind = this.name 5968 this = self._parse_var() or self._parse_primary() 5969 5970 return self.expression(exp.SessionParameter, this=this, kind=kind) 5971 5972 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5973 return self._parse_id_var() 5974 5975 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5976 index = self._index 5977 5978 if self._match(TokenType.L_PAREN): 5979 expressions = t.cast( 5980 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5981 ) 5982 5983 if not self._match(TokenType.R_PAREN): 5984 self._retreat(index) 5985 else: 5986 expressions = [self._parse_lambda_arg()] 5987 5988 if self._match_set(self.LAMBDAS): 5989 return self.LAMBDAS[self._prev.token_type](self, expressions) 5990 5991 self._retreat(index) 5992 5993 this: t.Optional[exp.Expression] 5994 5995 if self._match(TokenType.DISTINCT): 5996 this = self.expression( 5997 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5998 ) 5999 else: 6000 this = self._parse_select_or_expression(alias=alias) 6001 6002 return self._parse_limit( 6003 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6004 ) 6005 6006 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6007 index = self._index 6008 if not self._match(TokenType.L_PAREN): 6009 return this 6010 6011 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6012 # expr can be of both types 6013 if self._match_set(self.SELECT_START_TOKENS): 6014 self._retreat(index) 6015 return this 6016 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6017 self._match_r_paren() 6018 return self.expression(exp.Schema, this=this, expressions=args) 6019 6020 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6021 return self._parse_column_def(self._parse_field(any_token=True)) 6022 6023 def _parse_column_def( 6024 self, this: t.Optional[exp.Expression], computed_column: bool = True 6025 ) -> t.Optional[exp.Expression]: 6026 # column defs are not really columns, they're identifiers 6027 if isinstance(this, exp.Column): 6028 this = this.this 6029 6030 if not computed_column: 6031 self._match(TokenType.ALIAS) 6032 6033 kind = self._parse_types(schema=True) 6034 6035 if self._match_text_seq("FOR", "ORDINALITY"): 6036 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6037 6038 constraints: t.List[exp.Expression] = [] 6039 6040 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6041 ("ALIAS", "MATERIALIZED") 6042 ): 6043 persisted = self._prev.text.upper() == "MATERIALIZED" 6044 constraint_kind = exp.ComputedColumnConstraint( 6045 this=self._parse_assignment(), 6046 persisted=persisted or self._match_text_seq("PERSISTED"), 6047 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6048 ) 6049 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6050 elif ( 6051 kind 6052 and self._match(TokenType.ALIAS, advance=False) 6053 and ( 6054 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6055 or (self._next and self._next.token_type == TokenType.L_PAREN) 6056 ) 6057 ): 6058 self._advance() 6059 constraints.append( 6060 self.expression( 6061 exp.ColumnConstraint, 6062 kind=exp.ComputedColumnConstraint( 6063 this=self._parse_disjunction(), 6064 persisted=self._match_texts(("STORED", "VIRTUAL")) 6065 and self._prev.text.upper() == "STORED", 6066 ), 6067 ) 6068 ) 6069 6070 while True: 6071 constraint = self._parse_column_constraint() 6072 if not constraint: 6073 break 6074 constraints.append(constraint) 6075 6076 if not kind and not constraints: 6077 return this 6078 6079 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6080 6081 def _parse_auto_increment( 6082 self, 6083 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6084 start = None 6085 increment = None 6086 order = None 6087 6088 if self._match(TokenType.L_PAREN, advance=False): 6089 args = self._parse_wrapped_csv(self._parse_bitwise) 6090 start = seq_get(args, 0) 6091 increment = seq_get(args, 1) 6092 elif self._match_text_seq("START"): 6093 start = self._parse_bitwise() 6094 self._match_text_seq("INCREMENT") 6095 increment = self._parse_bitwise() 6096 if self._match_text_seq("ORDER"): 6097 order = True 6098 elif self._match_text_seq("NOORDER"): 6099 order = False 6100 6101 if start and increment: 6102 return exp.GeneratedAsIdentityColumnConstraint( 6103 start=start, increment=increment, this=False, order=order 6104 ) 6105 6106 return exp.AutoIncrementColumnConstraint() 6107 6108 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6109 if not self._match_text_seq("REFRESH"): 6110 self._retreat(self._index - 1) 6111 return None 6112 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6113 6114 def _parse_compress(self) -> exp.CompressColumnConstraint: 6115 if self._match(TokenType.L_PAREN, advance=False): 6116 return self.expression( 6117 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6118 ) 6119 6120 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6121 6122 def _parse_generated_as_identity( 6123 self, 6124 ) -> ( 6125 exp.GeneratedAsIdentityColumnConstraint 6126 | exp.ComputedColumnConstraint 6127 | exp.GeneratedAsRowColumnConstraint 6128 ): 6129 if self._match_text_seq("BY", "DEFAULT"): 6130 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6131 this = self.expression( 6132 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6133 ) 6134 else: 6135 self._match_text_seq("ALWAYS") 6136 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6137 6138 self._match(TokenType.ALIAS) 6139 6140 if self._match_text_seq("ROW"): 6141 start = self._match_text_seq("START") 6142 if not start: 6143 self._match(TokenType.END) 6144 hidden = self._match_text_seq("HIDDEN") 6145 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6146 6147 identity = self._match_text_seq("IDENTITY") 6148 6149 if self._match(TokenType.L_PAREN): 6150 if self._match(TokenType.START_WITH): 6151 this.set("start", self._parse_bitwise()) 6152 if self._match_text_seq("INCREMENT", "BY"): 6153 this.set("increment", self._parse_bitwise()) 6154 if self._match_text_seq("MINVALUE"): 6155 this.set("minvalue", self._parse_bitwise()) 6156 if self._match_text_seq("MAXVALUE"): 6157 this.set("maxvalue", self._parse_bitwise()) 6158 6159 if self._match_text_seq("CYCLE"): 6160 this.set("cycle", True) 6161 elif self._match_text_seq("NO", "CYCLE"): 6162 this.set("cycle", False) 6163 6164 if not identity: 6165 this.set("expression", self._parse_range()) 6166 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6167 args = self._parse_csv(self._parse_bitwise) 6168 this.set("start", seq_get(args, 0)) 6169 this.set("increment", seq_get(args, 1)) 6170 6171 self._match_r_paren() 6172 6173 return this 6174 6175 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6176 self._match_text_seq("LENGTH") 6177 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6178 6179 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6180 if self._match_text_seq("NULL"): 6181 return self.expression(exp.NotNullColumnConstraint) 6182 if self._match_text_seq("CASESPECIFIC"): 6183 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6184 if self._match_text_seq("FOR", "REPLICATION"): 6185 return self.expression(exp.NotForReplicationColumnConstraint) 6186 6187 # Unconsume the `NOT` token 6188 self._retreat(self._index - 1) 6189 return None 6190 6191 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6192 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6193 6194 procedure_option_follows = ( 6195 self._match(TokenType.WITH, advance=False) 6196 and self._next 6197 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6198 ) 6199 6200 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6201 return self.expression( 6202 exp.ColumnConstraint, 6203 this=this, 6204 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6205 ) 6206 6207 return this 6208 6209 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6210 if not self._match(TokenType.CONSTRAINT): 6211 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6212 6213 return self.expression( 6214 exp.Constraint, 6215 this=self._parse_id_var(), 6216 expressions=self._parse_unnamed_constraints(), 6217 ) 6218 6219 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6220 constraints = [] 6221 while True: 6222 constraint = self._parse_unnamed_constraint() or self._parse_function() 6223 if not constraint: 6224 break 6225 constraints.append(constraint) 6226 6227 return constraints 6228 6229 def _parse_unnamed_constraint( 6230 self, constraints: t.Optional[t.Collection[str]] = None 6231 ) -> t.Optional[exp.Expression]: 6232 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6233 constraints or self.CONSTRAINT_PARSERS 6234 ): 6235 return None 6236 6237 constraint = self._prev.text.upper() 6238 if constraint not in self.CONSTRAINT_PARSERS: 6239 self.raise_error(f"No parser found for schema constraint {constraint}.") 6240 6241 return self.CONSTRAINT_PARSERS[constraint](self) 6242 6243 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6244 return self._parse_id_var(any_token=False) 6245 6246 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6247 self._match_texts(("KEY", "INDEX")) 6248 return self.expression( 6249 exp.UniqueColumnConstraint, 6250 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6251 this=self._parse_schema(self._parse_unique_key()), 6252 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6253 on_conflict=self._parse_on_conflict(), 6254 options=self._parse_key_constraint_options(), 6255 ) 6256 6257 def _parse_key_constraint_options(self) -> t.List[str]: 6258 options = [] 6259 while True: 6260 if not self._curr: 6261 break 6262 6263 if self._match(TokenType.ON): 6264 action = None 6265 on = self._advance_any() and self._prev.text 6266 6267 if self._match_text_seq("NO", "ACTION"): 6268 action = "NO ACTION" 6269 elif self._match_text_seq("CASCADE"): 6270 action = "CASCADE" 6271 elif self._match_text_seq("RESTRICT"): 6272 action = "RESTRICT" 6273 elif self._match_pair(TokenType.SET, TokenType.NULL): 6274 action = "SET NULL" 6275 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6276 action = "SET DEFAULT" 6277 else: 6278 self.raise_error("Invalid key constraint") 6279 6280 options.append(f"ON {on} {action}") 6281 else: 6282 var = self._parse_var_from_options( 6283 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6284 ) 6285 if not var: 6286 break 6287 options.append(var.name) 6288 6289 return options 6290 6291 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6292 if match and not self._match(TokenType.REFERENCES): 6293 return None 6294 6295 expressions = None 6296 this = self._parse_table(schema=True) 6297 options = self._parse_key_constraint_options() 6298 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6299 6300 def _parse_foreign_key(self) -> exp.ForeignKey: 6301 expressions = ( 6302 self._parse_wrapped_id_vars() 6303 if not self._match(TokenType.REFERENCES, advance=False) 6304 else None 6305 ) 6306 reference = self._parse_references() 6307 on_options = {} 6308 6309 while self._match(TokenType.ON): 6310 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6311 self.raise_error("Expected DELETE or UPDATE") 6312 6313 kind = self._prev.text.lower() 6314 6315 if self._match_text_seq("NO", "ACTION"): 6316 action = "NO ACTION" 6317 elif self._match(TokenType.SET): 6318 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6319 action = "SET " + self._prev.text.upper() 6320 else: 6321 self._advance() 6322 action = self._prev.text.upper() 6323 6324 on_options[kind] = action 6325 6326 return self.expression( 6327 exp.ForeignKey, 6328 expressions=expressions, 6329 reference=reference, 6330 options=self._parse_key_constraint_options(), 6331 **on_options, # type: ignore 6332 ) 6333 6334 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6335 return self._parse_ordered() or self._parse_field() 6336 6337 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6338 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6339 self._retreat(self._index - 1) 6340 return None 6341 6342 id_vars = self._parse_wrapped_id_vars() 6343 return self.expression( 6344 exp.PeriodForSystemTimeConstraint, 6345 this=seq_get(id_vars, 0), 6346 expression=seq_get(id_vars, 1), 6347 ) 6348 6349 def _parse_primary_key( 6350 self, wrapped_optional: bool = False, in_props: bool = False 6351 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6352 desc = ( 6353 self._match_set((TokenType.ASC, TokenType.DESC)) 6354 and self._prev.token_type == TokenType.DESC 6355 ) 6356 6357 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6358 return self.expression( 6359 exp.PrimaryKeyColumnConstraint, 6360 desc=desc, 6361 options=self._parse_key_constraint_options(), 6362 ) 6363 6364 expressions = self._parse_wrapped_csv( 6365 self._parse_primary_key_part, optional=wrapped_optional 6366 ) 6367 6368 return self.expression( 6369 exp.PrimaryKey, 6370 expressions=expressions, 6371 include=self._parse_index_params(), 6372 options=self._parse_key_constraint_options(), 6373 ) 6374 6375 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6376 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6377 6378 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6379 """ 6380 Parses a datetime column in ODBC format. We parse the column into the corresponding 6381 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6382 same as we did for `DATE('yyyy-mm-dd')`. 6383 6384 Reference: 6385 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6386 """ 6387 self._match(TokenType.VAR) 6388 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6389 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6390 if not self._match(TokenType.R_BRACE): 6391 self.raise_error("Expected }") 6392 return expression 6393 6394 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6395 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6396 return this 6397 6398 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6399 map_token = seq_get(self._tokens, self._index - 2) 6400 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6401 else: 6402 parse_map = False 6403 6404 bracket_kind = self._prev.token_type 6405 if ( 6406 bracket_kind == TokenType.L_BRACE 6407 and self._curr 6408 and self._curr.token_type == TokenType.VAR 6409 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6410 ): 6411 return self._parse_odbc_datetime_literal() 6412 6413 expressions = self._parse_csv( 6414 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6415 ) 6416 6417 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6418 self.raise_error("Expected ]") 6419 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6420 self.raise_error("Expected }") 6421 6422 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6423 if bracket_kind == TokenType.L_BRACE: 6424 this = self.expression( 6425 exp.Struct, 6426 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6427 ) 6428 elif not this: 6429 this = build_array_constructor( 6430 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6431 ) 6432 else: 6433 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6434 if constructor_type: 6435 return build_array_constructor( 6436 constructor_type, 6437 args=expressions, 6438 bracket_kind=bracket_kind, 6439 dialect=self.dialect, 6440 ) 6441 6442 expressions = apply_index_offset( 6443 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6444 ) 6445 this = self.expression( 6446 exp.Bracket, 6447 this=this, 6448 expressions=expressions, 6449 comments=this.pop_comments(), 6450 ) 6451 6452 self._add_comments(this) 6453 return self._parse_bracket(this) 6454 6455 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6456 if self._match(TokenType.COLON): 6457 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6458 return this 6459 6460 def _parse_case(self) -> t.Optional[exp.Expression]: 6461 ifs = [] 6462 default = None 6463 6464 comments = self._prev_comments 6465 expression = self._parse_assignment() 6466 6467 while self._match(TokenType.WHEN): 6468 this = self._parse_assignment() 6469 self._match(TokenType.THEN) 6470 then = self._parse_assignment() 6471 ifs.append(self.expression(exp.If, this=this, true=then)) 6472 6473 if self._match(TokenType.ELSE): 6474 default = self._parse_assignment() 6475 6476 if not self._match(TokenType.END): 6477 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6478 default = exp.column("interval") 6479 else: 6480 self.raise_error("Expected END after CASE", self._prev) 6481 6482 return self.expression( 6483 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6484 ) 6485 6486 def _parse_if(self) -> t.Optional[exp.Expression]: 6487 if self._match(TokenType.L_PAREN): 6488 args = self._parse_csv( 6489 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6490 ) 6491 this = self.validate_expression(exp.If.from_arg_list(args), args) 6492 self._match_r_paren() 6493 else: 6494 index = self._index - 1 6495 6496 if self.NO_PAREN_IF_COMMANDS and index == 0: 6497 return self._parse_as_command(self._prev) 6498 6499 condition = self._parse_assignment() 6500 6501 if not condition: 6502 self._retreat(index) 6503 return None 6504 6505 self._match(TokenType.THEN) 6506 true = self._parse_assignment() 6507 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6508 self._match(TokenType.END) 6509 this = self.expression(exp.If, this=condition, true=true, false=false) 6510 6511 return this 6512 6513 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6514 if not self._match_text_seq("VALUE", "FOR"): 6515 self._retreat(self._index - 1) 6516 return None 6517 6518 return self.expression( 6519 exp.NextValueFor, 6520 this=self._parse_column(), 6521 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6522 ) 6523 6524 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6525 this = self._parse_function() or self._parse_var_or_string(upper=True) 6526 6527 if self._match(TokenType.FROM): 6528 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6529 6530 if not self._match(TokenType.COMMA): 6531 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6532 6533 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6534 6535 def _parse_gap_fill(self) -> exp.GapFill: 6536 self._match(TokenType.TABLE) 6537 this = self._parse_table() 6538 6539 self._match(TokenType.COMMA) 6540 args = [this, *self._parse_csv(self._parse_lambda)] 6541 6542 gap_fill = exp.GapFill.from_arg_list(args) 6543 return self.validate_expression(gap_fill, args) 6544 6545 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6546 this = self._parse_assignment() 6547 6548 if not self._match(TokenType.ALIAS): 6549 if self._match(TokenType.COMMA): 6550 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6551 6552 self.raise_error("Expected AS after CAST") 6553 6554 fmt = None 6555 to = self._parse_types() 6556 6557 default = self._match(TokenType.DEFAULT) 6558 if default: 6559 default = self._parse_bitwise() 6560 self._match_text_seq("ON", "CONVERSION", "ERROR") 6561 6562 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6563 fmt_string = self._parse_string() 6564 fmt = self._parse_at_time_zone(fmt_string) 6565 6566 if not to: 6567 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6568 if to.this in exp.DataType.TEMPORAL_TYPES: 6569 this = self.expression( 6570 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6571 this=this, 6572 format=exp.Literal.string( 6573 format_time( 6574 fmt_string.this if fmt_string else "", 6575 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6576 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6577 ) 6578 ), 6579 safe=safe, 6580 ) 6581 6582 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6583 this.set("zone", fmt.args["zone"]) 6584 return this 6585 elif not to: 6586 self.raise_error("Expected TYPE after CAST") 6587 elif isinstance(to, exp.Identifier): 6588 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6589 elif to.this == exp.DataType.Type.CHAR: 6590 if self._match(TokenType.CHARACTER_SET): 6591 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6592 6593 return self.build_cast( 6594 strict=strict, 6595 this=this, 6596 to=to, 6597 format=fmt, 6598 safe=safe, 6599 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6600 default=default, 6601 ) 6602 6603 def _parse_string_agg(self) -> exp.GroupConcat: 6604 if self._match(TokenType.DISTINCT): 6605 args: t.List[t.Optional[exp.Expression]] = [ 6606 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6607 ] 6608 if self._match(TokenType.COMMA): 6609 args.extend(self._parse_csv(self._parse_assignment)) 6610 else: 6611 args = self._parse_csv(self._parse_assignment) # type: ignore 6612 6613 if self._match_text_seq("ON", "OVERFLOW"): 6614 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6615 if self._match_text_seq("ERROR"): 6616 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6617 else: 6618 self._match_text_seq("TRUNCATE") 6619 on_overflow = self.expression( 6620 exp.OverflowTruncateBehavior, 6621 this=self._parse_string(), 6622 with_count=( 6623 self._match_text_seq("WITH", "COUNT") 6624 or not self._match_text_seq("WITHOUT", "COUNT") 6625 ), 6626 ) 6627 else: 6628 on_overflow = None 6629 6630 index = self._index 6631 if not self._match(TokenType.R_PAREN) and args: 6632 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6633 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6634 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6635 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6636 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6637 6638 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6639 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6640 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6641 if not self._match_text_seq("WITHIN", "GROUP"): 6642 self._retreat(index) 6643 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6644 6645 # The corresponding match_r_paren will be called in parse_function (caller) 6646 self._match_l_paren() 6647 6648 return self.expression( 6649 exp.GroupConcat, 6650 this=self._parse_order(this=seq_get(args, 0)), 6651 separator=seq_get(args, 1), 6652 on_overflow=on_overflow, 6653 ) 6654 6655 def _parse_convert( 6656 self, strict: bool, safe: t.Optional[bool] = None 6657 ) -> t.Optional[exp.Expression]: 6658 this = self._parse_bitwise() 6659 6660 if self._match(TokenType.USING): 6661 to: t.Optional[exp.Expression] = self.expression( 6662 exp.CharacterSet, this=self._parse_var() 6663 ) 6664 elif self._match(TokenType.COMMA): 6665 to = self._parse_types() 6666 else: 6667 to = None 6668 6669 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6670 6671 def _parse_xml_table(self) -> exp.XMLTable: 6672 namespaces = None 6673 passing = None 6674 columns = None 6675 6676 if self._match_text_seq("XMLNAMESPACES", "("): 6677 namespaces = self._parse_xml_namespace() 6678 self._match_text_seq(")", ",") 6679 6680 this = self._parse_string() 6681 6682 if self._match_text_seq("PASSING"): 6683 # The BY VALUE keywords are optional and are provided for semantic clarity 6684 self._match_text_seq("BY", "VALUE") 6685 passing = self._parse_csv(self._parse_column) 6686 6687 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6688 6689 if self._match_text_seq("COLUMNS"): 6690 columns = self._parse_csv(self._parse_field_def) 6691 6692 return self.expression( 6693 exp.XMLTable, 6694 this=this, 6695 namespaces=namespaces, 6696 passing=passing, 6697 columns=columns, 6698 by_ref=by_ref, 6699 ) 6700 6701 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6702 namespaces = [] 6703 6704 while True: 6705 if self._match(TokenType.DEFAULT): 6706 uri = self._parse_string() 6707 else: 6708 uri = self._parse_alias(self._parse_string()) 6709 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6710 if not self._match(TokenType.COMMA): 6711 break 6712 6713 return namespaces 6714 6715 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6716 args = self._parse_csv(self._parse_assignment) 6717 6718 if len(args) < 3: 6719 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6720 6721 return self.expression(exp.DecodeCase, expressions=args) 6722 6723 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6724 self._match_text_seq("KEY") 6725 key = self._parse_column() 6726 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6727 self._match_text_seq("VALUE") 6728 value = self._parse_bitwise() 6729 6730 if not key and not value: 6731 return None 6732 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6733 6734 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6735 if not this or not self._match_text_seq("FORMAT", "JSON"): 6736 return this 6737 6738 return self.expression(exp.FormatJson, this=this) 6739 6740 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6741 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6742 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6743 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6744 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6745 else: 6746 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6747 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6748 6749 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6750 6751 if not empty and not error and not null: 6752 return None 6753 6754 return self.expression( 6755 exp.OnCondition, 6756 empty=empty, 6757 error=error, 6758 null=null, 6759 ) 6760 6761 def _parse_on_handling( 6762 self, on: str, *values: str 6763 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6764 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6765 for value in values: 6766 if self._match_text_seq(value, "ON", on): 6767 return f"{value} ON {on}" 6768 6769 index = self._index 6770 if self._match(TokenType.DEFAULT): 6771 default_value = self._parse_bitwise() 6772 if self._match_text_seq("ON", on): 6773 return default_value 6774 6775 self._retreat(index) 6776 6777 return None 6778 6779 @t.overload 6780 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6781 6782 @t.overload 6783 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6784 6785 def _parse_json_object(self, agg=False): 6786 star = self._parse_star() 6787 expressions = ( 6788 [star] 6789 if star 6790 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6791 ) 6792 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6793 6794 unique_keys = None 6795 if self._match_text_seq("WITH", "UNIQUE"): 6796 unique_keys = True 6797 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6798 unique_keys = False 6799 6800 self._match_text_seq("KEYS") 6801 6802 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6803 self._parse_type() 6804 ) 6805 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6806 6807 return self.expression( 6808 exp.JSONObjectAgg if agg else exp.JSONObject, 6809 expressions=expressions, 6810 null_handling=null_handling, 6811 unique_keys=unique_keys, 6812 return_type=return_type, 6813 encoding=encoding, 6814 ) 6815 6816 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6817 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6818 if not self._match_text_seq("NESTED"): 6819 this = self._parse_id_var() 6820 kind = self._parse_types(allow_identifiers=False) 6821 nested = None 6822 else: 6823 this = None 6824 kind = None 6825 nested = True 6826 6827 path = self._match_text_seq("PATH") and self._parse_string() 6828 nested_schema = nested and self._parse_json_schema() 6829 6830 return self.expression( 6831 exp.JSONColumnDef, 6832 this=this, 6833 kind=kind, 6834 path=path, 6835 nested_schema=nested_schema, 6836 ) 6837 6838 def _parse_json_schema(self) -> exp.JSONSchema: 6839 self._match_text_seq("COLUMNS") 6840 return self.expression( 6841 exp.JSONSchema, 6842 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6843 ) 6844 6845 def _parse_json_table(self) -> exp.JSONTable: 6846 this = self._parse_format_json(self._parse_bitwise()) 6847 path = self._match(TokenType.COMMA) and self._parse_string() 6848 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6849 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6850 schema = self._parse_json_schema() 6851 6852 return exp.JSONTable( 6853 this=this, 6854 schema=schema, 6855 path=path, 6856 error_handling=error_handling, 6857 empty_handling=empty_handling, 6858 ) 6859 6860 def _parse_match_against(self) -> exp.MatchAgainst: 6861 expressions = self._parse_csv(self._parse_column) 6862 6863 self._match_text_seq(")", "AGAINST", "(") 6864 6865 this = self._parse_string() 6866 6867 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6868 modifier = "IN NATURAL LANGUAGE MODE" 6869 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6870 modifier = f"{modifier} WITH QUERY EXPANSION" 6871 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6872 modifier = "IN BOOLEAN MODE" 6873 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6874 modifier = "WITH QUERY EXPANSION" 6875 else: 6876 modifier = None 6877 6878 return self.expression( 6879 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6880 ) 6881 6882 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6883 def _parse_open_json(self) -> exp.OpenJSON: 6884 this = self._parse_bitwise() 6885 path = self._match(TokenType.COMMA) and self._parse_string() 6886 6887 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6888 this = self._parse_field(any_token=True) 6889 kind = self._parse_types() 6890 path = self._parse_string() 6891 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6892 6893 return self.expression( 6894 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6895 ) 6896 6897 expressions = None 6898 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6899 self._match_l_paren() 6900 expressions = self._parse_csv(_parse_open_json_column_def) 6901 6902 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6903 6904 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6905 args = self._parse_csv(self._parse_bitwise) 6906 6907 if self._match(TokenType.IN): 6908 return self.expression( 6909 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6910 ) 6911 6912 if haystack_first: 6913 haystack = seq_get(args, 0) 6914 needle = seq_get(args, 1) 6915 else: 6916 haystack = seq_get(args, 1) 6917 needle = seq_get(args, 0) 6918 6919 return self.expression( 6920 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6921 ) 6922 6923 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6924 args = self._parse_csv(self._parse_table) 6925 return exp.JoinHint(this=func_name.upper(), expressions=args) 6926 6927 def _parse_substring(self) -> exp.Substring: 6928 # Postgres supports the form: substring(string [from int] [for int]) 6929 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6930 6931 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6932 6933 if self._match(TokenType.FROM): 6934 args.append(self._parse_bitwise()) 6935 if self._match(TokenType.FOR): 6936 if len(args) == 1: 6937 args.append(exp.Literal.number(1)) 6938 args.append(self._parse_bitwise()) 6939 6940 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6941 6942 def _parse_trim(self) -> exp.Trim: 6943 # https://www.w3resource.com/sql/character-functions/trim.php 6944 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6945 6946 position = None 6947 collation = None 6948 expression = None 6949 6950 if self._match_texts(self.TRIM_TYPES): 6951 position = self._prev.text.upper() 6952 6953 this = self._parse_bitwise() 6954 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6955 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6956 expression = self._parse_bitwise() 6957 6958 if invert_order: 6959 this, expression = expression, this 6960 6961 if self._match(TokenType.COLLATE): 6962 collation = self._parse_bitwise() 6963 6964 return self.expression( 6965 exp.Trim, this=this, position=position, expression=expression, collation=collation 6966 ) 6967 6968 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6969 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6970 6971 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6972 return self._parse_window(self._parse_id_var(), alias=True) 6973 6974 def _parse_respect_or_ignore_nulls( 6975 self, this: t.Optional[exp.Expression] 6976 ) -> t.Optional[exp.Expression]: 6977 if self._match_text_seq("IGNORE", "NULLS"): 6978 return self.expression(exp.IgnoreNulls, this=this) 6979 if self._match_text_seq("RESPECT", "NULLS"): 6980 return self.expression(exp.RespectNulls, this=this) 6981 return this 6982 6983 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6984 if self._match(TokenType.HAVING): 6985 self._match_texts(("MAX", "MIN")) 6986 max = self._prev.text.upper() != "MIN" 6987 return self.expression( 6988 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6989 ) 6990 6991 return this 6992 6993 def _parse_window( 6994 self, this: t.Optional[exp.Expression], alias: bool = False 6995 ) -> t.Optional[exp.Expression]: 6996 func = this 6997 comments = func.comments if isinstance(func, exp.Expression) else None 6998 6999 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7000 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7001 if self._match_text_seq("WITHIN", "GROUP"): 7002 order = self._parse_wrapped(self._parse_order) 7003 this = self.expression(exp.WithinGroup, this=this, expression=order) 7004 7005 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7006 self._match(TokenType.WHERE) 7007 this = self.expression( 7008 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7009 ) 7010 self._match_r_paren() 7011 7012 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7013 # Some dialects choose to implement and some do not. 7014 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7015 7016 # There is some code above in _parse_lambda that handles 7017 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7018 7019 # The below changes handle 7020 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7021 7022 # Oracle allows both formats 7023 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7024 # and Snowflake chose to do the same for familiarity 7025 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7026 if isinstance(this, exp.AggFunc): 7027 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7028 7029 if ignore_respect and ignore_respect is not this: 7030 ignore_respect.replace(ignore_respect.this) 7031 this = self.expression(ignore_respect.__class__, this=this) 7032 7033 this = self._parse_respect_or_ignore_nulls(this) 7034 7035 # bigquery select from window x AS (partition by ...) 7036 if alias: 7037 over = None 7038 self._match(TokenType.ALIAS) 7039 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7040 return this 7041 else: 7042 over = self._prev.text.upper() 7043 7044 if comments and isinstance(func, exp.Expression): 7045 func.pop_comments() 7046 7047 if not self._match(TokenType.L_PAREN): 7048 return self.expression( 7049 exp.Window, 7050 comments=comments, 7051 this=this, 7052 alias=self._parse_id_var(False), 7053 over=over, 7054 ) 7055 7056 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7057 7058 first = self._match(TokenType.FIRST) 7059 if self._match_text_seq("LAST"): 7060 first = False 7061 7062 partition, order = self._parse_partition_and_order() 7063 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7064 7065 if kind: 7066 self._match(TokenType.BETWEEN) 7067 start = self._parse_window_spec() 7068 self._match(TokenType.AND) 7069 end = self._parse_window_spec() 7070 exclude = ( 7071 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7072 if self._match_text_seq("EXCLUDE") 7073 else None 7074 ) 7075 7076 spec = self.expression( 7077 exp.WindowSpec, 7078 kind=kind, 7079 start=start["value"], 7080 start_side=start["side"], 7081 end=end["value"], 7082 end_side=end["side"], 7083 exclude=exclude, 7084 ) 7085 else: 7086 spec = None 7087 7088 self._match_r_paren() 7089 7090 window = self.expression( 7091 exp.Window, 7092 comments=comments, 7093 this=this, 7094 partition_by=partition, 7095 order=order, 7096 spec=spec, 7097 alias=window_alias, 7098 over=over, 7099 first=first, 7100 ) 7101 7102 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7103 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7104 return self._parse_window(window, alias=alias) 7105 7106 return window 7107 7108 def _parse_partition_and_order( 7109 self, 7110 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7111 return self._parse_partition_by(), self._parse_order() 7112 7113 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7114 self._match(TokenType.BETWEEN) 7115 7116 return { 7117 "value": ( 7118 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7119 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7120 or self._parse_bitwise() 7121 ), 7122 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7123 } 7124 7125 def _parse_alias( 7126 self, this: t.Optional[exp.Expression], explicit: bool = False 7127 ) -> t.Optional[exp.Expression]: 7128 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7129 # so this section tries to parse the clause version and if it fails, it treats the token 7130 # as an identifier (alias) 7131 if self._can_parse_limit_or_offset(): 7132 return this 7133 7134 any_token = self._match(TokenType.ALIAS) 7135 comments = self._prev_comments or [] 7136 7137 if explicit and not any_token: 7138 return this 7139 7140 if self._match(TokenType.L_PAREN): 7141 aliases = self.expression( 7142 exp.Aliases, 7143 comments=comments, 7144 this=this, 7145 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7146 ) 7147 self._match_r_paren(aliases) 7148 return aliases 7149 7150 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7151 self.STRING_ALIASES and self._parse_string_as_identifier() 7152 ) 7153 7154 if alias: 7155 comments.extend(alias.pop_comments()) 7156 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7157 column = this.this 7158 7159 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7160 if not this.comments and column and column.comments: 7161 this.comments = column.pop_comments() 7162 7163 return this 7164 7165 def _parse_id_var( 7166 self, 7167 any_token: bool = True, 7168 tokens: t.Optional[t.Collection[TokenType]] = None, 7169 ) -> t.Optional[exp.Expression]: 7170 expression = self._parse_identifier() 7171 if not expression and ( 7172 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7173 ): 7174 quoted = self._prev.token_type == TokenType.STRING 7175 expression = self._identifier_expression(quoted=quoted) 7176 7177 return expression 7178 7179 def _parse_string(self) -> t.Optional[exp.Expression]: 7180 if self._match_set(self.STRING_PARSERS): 7181 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7182 return self._parse_placeholder() 7183 7184 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7185 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7186 if output: 7187 output.update_positions(self._prev) 7188 return output 7189 7190 def _parse_number(self) -> t.Optional[exp.Expression]: 7191 if self._match_set(self.NUMERIC_PARSERS): 7192 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7193 return self._parse_placeholder() 7194 7195 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7196 if self._match(TokenType.IDENTIFIER): 7197 return self._identifier_expression(quoted=True) 7198 return self._parse_placeholder() 7199 7200 def _parse_var( 7201 self, 7202 any_token: bool = False, 7203 tokens: t.Optional[t.Collection[TokenType]] = None, 7204 upper: bool = False, 7205 ) -> t.Optional[exp.Expression]: 7206 if ( 7207 (any_token and self._advance_any()) 7208 or self._match(TokenType.VAR) 7209 or (self._match_set(tokens) if tokens else False) 7210 ): 7211 return self.expression( 7212 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7213 ) 7214 return self._parse_placeholder() 7215 7216 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7217 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7218 self._advance() 7219 return self._prev 7220 return None 7221 7222 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7223 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7224 7225 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7226 return self._parse_primary() or self._parse_var(any_token=True) 7227 7228 def _parse_null(self) -> t.Optional[exp.Expression]: 7229 if self._match_set(self.NULL_TOKENS): 7230 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7231 return self._parse_placeholder() 7232 7233 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7234 if self._match(TokenType.TRUE): 7235 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7236 if self._match(TokenType.FALSE): 7237 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7238 return self._parse_placeholder() 7239 7240 def _parse_star(self) -> t.Optional[exp.Expression]: 7241 if self._match(TokenType.STAR): 7242 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7243 return self._parse_placeholder() 7244 7245 def _parse_parameter(self) -> exp.Parameter: 7246 this = self._parse_identifier() or self._parse_primary_or_var() 7247 return self.expression(exp.Parameter, this=this) 7248 7249 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7250 if self._match_set(self.PLACEHOLDER_PARSERS): 7251 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7252 if placeholder: 7253 return placeholder 7254 self._advance(-1) 7255 return None 7256 7257 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7258 if not self._match_texts(keywords): 7259 return None 7260 if self._match(TokenType.L_PAREN, advance=False): 7261 return self._parse_wrapped_csv(self._parse_expression) 7262 7263 expression = self._parse_expression() 7264 return [expression] if expression else None 7265 7266 def _parse_csv( 7267 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7268 ) -> t.List[exp.Expression]: 7269 parse_result = parse_method() 7270 items = [parse_result] if parse_result is not None else [] 7271 7272 while self._match(sep): 7273 self._add_comments(parse_result) 7274 parse_result = parse_method() 7275 if parse_result is not None: 7276 items.append(parse_result) 7277 7278 return items 7279 7280 def _parse_tokens( 7281 self, parse_method: t.Callable, expressions: t.Dict 7282 ) -> t.Optional[exp.Expression]: 7283 this = parse_method() 7284 7285 while self._match_set(expressions): 7286 this = self.expression( 7287 expressions[self._prev.token_type], 7288 this=this, 7289 comments=self._prev_comments, 7290 expression=parse_method(), 7291 ) 7292 7293 return this 7294 7295 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7296 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7297 7298 def _parse_wrapped_csv( 7299 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7300 ) -> t.List[exp.Expression]: 7301 return self._parse_wrapped( 7302 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7303 ) 7304 7305 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7306 wrapped = self._match(TokenType.L_PAREN) 7307 if not wrapped and not optional: 7308 self.raise_error("Expecting (") 7309 parse_result = parse_method() 7310 if wrapped: 7311 self._match_r_paren() 7312 return parse_result 7313 7314 def _parse_expressions(self) -> t.List[exp.Expression]: 7315 return self._parse_csv(self._parse_expression) 7316 7317 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7318 return self._parse_select() or self._parse_set_operations( 7319 self._parse_alias(self._parse_assignment(), explicit=True) 7320 if alias 7321 else self._parse_assignment() 7322 ) 7323 7324 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7325 return self._parse_query_modifiers( 7326 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7327 ) 7328 7329 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7330 this = None 7331 if self._match_texts(self.TRANSACTION_KIND): 7332 this = self._prev.text 7333 7334 self._match_texts(("TRANSACTION", "WORK")) 7335 7336 modes = [] 7337 while True: 7338 mode = [] 7339 while self._match(TokenType.VAR): 7340 mode.append(self._prev.text) 7341 7342 if mode: 7343 modes.append(" ".join(mode)) 7344 if not self._match(TokenType.COMMA): 7345 break 7346 7347 return self.expression(exp.Transaction, this=this, modes=modes) 7348 7349 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7350 chain = None 7351 savepoint = None 7352 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7353 7354 self._match_texts(("TRANSACTION", "WORK")) 7355 7356 if self._match_text_seq("TO"): 7357 self._match_text_seq("SAVEPOINT") 7358 savepoint = self._parse_id_var() 7359 7360 if self._match(TokenType.AND): 7361 chain = not self._match_text_seq("NO") 7362 self._match_text_seq("CHAIN") 7363 7364 if is_rollback: 7365 return self.expression(exp.Rollback, savepoint=savepoint) 7366 7367 return self.expression(exp.Commit, chain=chain) 7368 7369 def _parse_refresh(self) -> exp.Refresh: 7370 self._match(TokenType.TABLE) 7371 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7372 7373 def _parse_column_def_with_exists(self): 7374 start = self._index 7375 self._match(TokenType.COLUMN) 7376 7377 exists_column = self._parse_exists(not_=True) 7378 expression = self._parse_field_def() 7379 7380 if not isinstance(expression, exp.ColumnDef): 7381 self._retreat(start) 7382 return None 7383 7384 expression.set("exists", exists_column) 7385 7386 return expression 7387 7388 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7389 if not self._prev.text.upper() == "ADD": 7390 return None 7391 7392 expression = self._parse_column_def_with_exists() 7393 if not expression: 7394 return None 7395 7396 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7397 if self._match_texts(("FIRST", "AFTER")): 7398 position = self._prev.text 7399 column_position = self.expression( 7400 exp.ColumnPosition, this=self._parse_column(), position=position 7401 ) 7402 expression.set("position", column_position) 7403 7404 return expression 7405 7406 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7407 drop = self._match(TokenType.DROP) and self._parse_drop() 7408 if drop and not isinstance(drop, exp.Command): 7409 drop.set("kind", drop.args.get("kind", "COLUMN")) 7410 return drop 7411 7412 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7413 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7414 return self.expression( 7415 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7416 ) 7417 7418 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7419 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7420 self._match_text_seq("ADD") 7421 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7422 return self.expression( 7423 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7424 ) 7425 7426 column_def = self._parse_add_column() 7427 if isinstance(column_def, exp.ColumnDef): 7428 return column_def 7429 7430 exists = self._parse_exists(not_=True) 7431 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7432 return self.expression( 7433 exp.AddPartition, 7434 exists=exists, 7435 this=self._parse_field(any_token=True), 7436 location=self._match_text_seq("LOCATION", advance=False) 7437 and self._parse_property(), 7438 ) 7439 7440 return None 7441 7442 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7443 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7444 or self._match_text_seq("COLUMNS") 7445 ): 7446 schema = self._parse_schema() 7447 7448 return ( 7449 ensure_list(schema) 7450 if schema 7451 else self._parse_csv(self._parse_column_def_with_exists) 7452 ) 7453 7454 return self._parse_csv(_parse_add_alteration) 7455 7456 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7457 if self._match_texts(self.ALTER_ALTER_PARSERS): 7458 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7459 7460 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7461 # keyword after ALTER we default to parsing this statement 7462 self._match(TokenType.COLUMN) 7463 column = self._parse_field(any_token=True) 7464 7465 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7466 return self.expression(exp.AlterColumn, this=column, drop=True) 7467 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7468 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7469 if self._match(TokenType.COMMENT): 7470 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7471 if self._match_text_seq("DROP", "NOT", "NULL"): 7472 return self.expression( 7473 exp.AlterColumn, 7474 this=column, 7475 drop=True, 7476 allow_null=True, 7477 ) 7478 if self._match_text_seq("SET", "NOT", "NULL"): 7479 return self.expression( 7480 exp.AlterColumn, 7481 this=column, 7482 allow_null=False, 7483 ) 7484 7485 if self._match_text_seq("SET", "VISIBLE"): 7486 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7487 if self._match_text_seq("SET", "INVISIBLE"): 7488 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7489 7490 self._match_text_seq("SET", "DATA") 7491 self._match_text_seq("TYPE") 7492 return self.expression( 7493 exp.AlterColumn, 7494 this=column, 7495 dtype=self._parse_types(), 7496 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7497 using=self._match(TokenType.USING) and self._parse_assignment(), 7498 ) 7499 7500 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7501 if self._match_texts(("ALL", "EVEN", "AUTO")): 7502 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7503 7504 self._match_text_seq("KEY", "DISTKEY") 7505 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7506 7507 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7508 if compound: 7509 self._match_text_seq("SORTKEY") 7510 7511 if self._match(TokenType.L_PAREN, advance=False): 7512 return self.expression( 7513 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7514 ) 7515 7516 self._match_texts(("AUTO", "NONE")) 7517 return self.expression( 7518 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7519 ) 7520 7521 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7522 index = self._index - 1 7523 7524 partition_exists = self._parse_exists() 7525 if self._match(TokenType.PARTITION, advance=False): 7526 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7527 7528 self._retreat(index) 7529 return self._parse_csv(self._parse_drop_column) 7530 7531 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7532 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7533 exists = self._parse_exists() 7534 old_column = self._parse_column() 7535 to = self._match_text_seq("TO") 7536 new_column = self._parse_column() 7537 7538 if old_column is None or to is None or new_column is None: 7539 return None 7540 7541 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7542 7543 self._match_text_seq("TO") 7544 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7545 7546 def _parse_alter_table_set(self) -> exp.AlterSet: 7547 alter_set = self.expression(exp.AlterSet) 7548 7549 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7550 "TABLE", "PROPERTIES" 7551 ): 7552 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7553 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7554 alter_set.set("expressions", [self._parse_assignment()]) 7555 elif self._match_texts(("LOGGED", "UNLOGGED")): 7556 alter_set.set("option", exp.var(self._prev.text.upper())) 7557 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7558 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7559 elif self._match_text_seq("LOCATION"): 7560 alter_set.set("location", self._parse_field()) 7561 elif self._match_text_seq("ACCESS", "METHOD"): 7562 alter_set.set("access_method", self._parse_field()) 7563 elif self._match_text_seq("TABLESPACE"): 7564 alter_set.set("tablespace", self._parse_field()) 7565 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7566 alter_set.set("file_format", [self._parse_field()]) 7567 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7568 alter_set.set("file_format", self._parse_wrapped_options()) 7569 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7570 alter_set.set("copy_options", self._parse_wrapped_options()) 7571 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7572 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7573 else: 7574 if self._match_text_seq("SERDE"): 7575 alter_set.set("serde", self._parse_field()) 7576 7577 properties = self._parse_wrapped(self._parse_properties, optional=True) 7578 alter_set.set("expressions", [properties]) 7579 7580 return alter_set 7581 7582 def _parse_alter(self) -> exp.Alter | exp.Command: 7583 start = self._prev 7584 7585 alter_token = self._match_set(self.ALTERABLES) and self._prev 7586 if not alter_token: 7587 return self._parse_as_command(start) 7588 7589 exists = self._parse_exists() 7590 only = self._match_text_seq("ONLY") 7591 this = self._parse_table(schema=True) 7592 check = self._match_text_seq("WITH", "CHECK") 7593 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7594 7595 if self._next: 7596 self._advance() 7597 7598 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7599 if parser: 7600 actions = ensure_list(parser(self)) 7601 not_valid = self._match_text_seq("NOT", "VALID") 7602 options = self._parse_csv(self._parse_property) 7603 7604 if not self._curr and actions: 7605 return self.expression( 7606 exp.Alter, 7607 this=this, 7608 kind=alter_token.text.upper(), 7609 exists=exists, 7610 actions=actions, 7611 only=only, 7612 options=options, 7613 cluster=cluster, 7614 not_valid=not_valid, 7615 check=check, 7616 ) 7617 7618 return self._parse_as_command(start) 7619 7620 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7621 start = self._prev 7622 # https://duckdb.org/docs/sql/statements/analyze 7623 if not self._curr: 7624 return self.expression(exp.Analyze) 7625 7626 options = [] 7627 while self._match_texts(self.ANALYZE_STYLES): 7628 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7629 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7630 else: 7631 options.append(self._prev.text.upper()) 7632 7633 this: t.Optional[exp.Expression] = None 7634 inner_expression: t.Optional[exp.Expression] = None 7635 7636 kind = self._curr and self._curr.text.upper() 7637 7638 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7639 this = self._parse_table_parts() 7640 elif self._match_text_seq("TABLES"): 7641 if self._match_set((TokenType.FROM, TokenType.IN)): 7642 kind = f"{kind} {self._prev.text.upper()}" 7643 this = self._parse_table(schema=True, is_db_reference=True) 7644 elif self._match_text_seq("DATABASE"): 7645 this = self._parse_table(schema=True, is_db_reference=True) 7646 elif self._match_text_seq("CLUSTER"): 7647 this = self._parse_table() 7648 # Try matching inner expr keywords before fallback to parse table. 7649 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7650 kind = None 7651 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7652 else: 7653 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7654 kind = None 7655 this = self._parse_table_parts() 7656 7657 partition = self._try_parse(self._parse_partition) 7658 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7659 return self._parse_as_command(start) 7660 7661 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7662 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7663 "WITH", "ASYNC", "MODE" 7664 ): 7665 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7666 else: 7667 mode = None 7668 7669 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7670 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7671 7672 properties = self._parse_properties() 7673 return self.expression( 7674 exp.Analyze, 7675 kind=kind, 7676 this=this, 7677 mode=mode, 7678 partition=partition, 7679 properties=properties, 7680 expression=inner_expression, 7681 options=options, 7682 ) 7683 7684 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7685 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7686 this = None 7687 kind = self._prev.text.upper() 7688 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7689 expressions = [] 7690 7691 if not self._match_text_seq("STATISTICS"): 7692 self.raise_error("Expecting token STATISTICS") 7693 7694 if self._match_text_seq("NOSCAN"): 7695 this = "NOSCAN" 7696 elif self._match(TokenType.FOR): 7697 if self._match_text_seq("ALL", "COLUMNS"): 7698 this = "FOR ALL COLUMNS" 7699 if self._match_texts("COLUMNS"): 7700 this = "FOR COLUMNS" 7701 expressions = self._parse_csv(self._parse_column_reference) 7702 elif self._match_text_seq("SAMPLE"): 7703 sample = self._parse_number() 7704 expressions = [ 7705 self.expression( 7706 exp.AnalyzeSample, 7707 sample=sample, 7708 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7709 ) 7710 ] 7711 7712 return self.expression( 7713 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7714 ) 7715 7716 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7717 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7718 kind = None 7719 this = None 7720 expression: t.Optional[exp.Expression] = None 7721 if self._match_text_seq("REF", "UPDATE"): 7722 kind = "REF" 7723 this = "UPDATE" 7724 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7725 this = "UPDATE SET DANGLING TO NULL" 7726 elif self._match_text_seq("STRUCTURE"): 7727 kind = "STRUCTURE" 7728 if self._match_text_seq("CASCADE", "FAST"): 7729 this = "CASCADE FAST" 7730 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7731 ("ONLINE", "OFFLINE") 7732 ): 7733 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7734 expression = self._parse_into() 7735 7736 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7737 7738 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7739 this = self._prev.text.upper() 7740 if self._match_text_seq("COLUMNS"): 7741 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7742 return None 7743 7744 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7745 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7746 if self._match_text_seq("STATISTICS"): 7747 return self.expression(exp.AnalyzeDelete, kind=kind) 7748 return None 7749 7750 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7751 if self._match_text_seq("CHAINED", "ROWS"): 7752 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7753 return None 7754 7755 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7756 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7757 this = self._prev.text.upper() 7758 expression: t.Optional[exp.Expression] = None 7759 expressions = [] 7760 update_options = None 7761 7762 if self._match_text_seq("HISTOGRAM", "ON"): 7763 expressions = self._parse_csv(self._parse_column_reference) 7764 with_expressions = [] 7765 while self._match(TokenType.WITH): 7766 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7767 if self._match_texts(("SYNC", "ASYNC")): 7768 if self._match_text_seq("MODE", advance=False): 7769 with_expressions.append(f"{self._prev.text.upper()} MODE") 7770 self._advance() 7771 else: 7772 buckets = self._parse_number() 7773 if self._match_text_seq("BUCKETS"): 7774 with_expressions.append(f"{buckets} BUCKETS") 7775 if with_expressions: 7776 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7777 7778 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7779 TokenType.UPDATE, advance=False 7780 ): 7781 update_options = self._prev.text.upper() 7782 self._advance() 7783 elif self._match_text_seq("USING", "DATA"): 7784 expression = self.expression(exp.UsingData, this=self._parse_string()) 7785 7786 return self.expression( 7787 exp.AnalyzeHistogram, 7788 this=this, 7789 expressions=expressions, 7790 expression=expression, 7791 update_options=update_options, 7792 ) 7793 7794 def _parse_merge(self) -> exp.Merge: 7795 self._match(TokenType.INTO) 7796 target = self._parse_table() 7797 7798 if target and self._match(TokenType.ALIAS, advance=False): 7799 target.set("alias", self._parse_table_alias()) 7800 7801 self._match(TokenType.USING) 7802 using = self._parse_table() 7803 7804 self._match(TokenType.ON) 7805 on = self._parse_assignment() 7806 7807 return self.expression( 7808 exp.Merge, 7809 this=target, 7810 using=using, 7811 on=on, 7812 whens=self._parse_when_matched(), 7813 returning=self._parse_returning(), 7814 ) 7815 7816 def _parse_when_matched(self) -> exp.Whens: 7817 whens = [] 7818 7819 while self._match(TokenType.WHEN): 7820 matched = not self._match(TokenType.NOT) 7821 self._match_text_seq("MATCHED") 7822 source = ( 7823 False 7824 if self._match_text_seq("BY", "TARGET") 7825 else self._match_text_seq("BY", "SOURCE") 7826 ) 7827 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7828 7829 self._match(TokenType.THEN) 7830 7831 if self._match(TokenType.INSERT): 7832 this = self._parse_star() 7833 if this: 7834 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7835 else: 7836 then = self.expression( 7837 exp.Insert, 7838 this=exp.var("ROW") 7839 if self._match_text_seq("ROW") 7840 else self._parse_value(values=False), 7841 expression=self._match_text_seq("VALUES") and self._parse_value(), 7842 ) 7843 elif self._match(TokenType.UPDATE): 7844 expressions = self._parse_star() 7845 if expressions: 7846 then = self.expression(exp.Update, expressions=expressions) 7847 else: 7848 then = self.expression( 7849 exp.Update, 7850 expressions=self._match(TokenType.SET) 7851 and self._parse_csv(self._parse_equality), 7852 ) 7853 elif self._match(TokenType.DELETE): 7854 then = self.expression(exp.Var, this=self._prev.text) 7855 else: 7856 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7857 7858 whens.append( 7859 self.expression( 7860 exp.When, 7861 matched=matched, 7862 source=source, 7863 condition=condition, 7864 then=then, 7865 ) 7866 ) 7867 return self.expression(exp.Whens, expressions=whens) 7868 7869 def _parse_show(self) -> t.Optional[exp.Expression]: 7870 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7871 if parser: 7872 return parser(self) 7873 return self._parse_as_command(self._prev) 7874 7875 def _parse_set_item_assignment( 7876 self, kind: t.Optional[str] = None 7877 ) -> t.Optional[exp.Expression]: 7878 index = self._index 7879 7880 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7881 return self._parse_set_transaction(global_=kind == "GLOBAL") 7882 7883 left = self._parse_primary() or self._parse_column() 7884 assignment_delimiter = self._match_texts(("=", "TO")) 7885 7886 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7887 self._retreat(index) 7888 return None 7889 7890 right = self._parse_statement() or self._parse_id_var() 7891 if isinstance(right, (exp.Column, exp.Identifier)): 7892 right = exp.var(right.name) 7893 7894 this = self.expression(exp.EQ, this=left, expression=right) 7895 return self.expression(exp.SetItem, this=this, kind=kind) 7896 7897 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7898 self._match_text_seq("TRANSACTION") 7899 characteristics = self._parse_csv( 7900 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7901 ) 7902 return self.expression( 7903 exp.SetItem, 7904 expressions=characteristics, 7905 kind="TRANSACTION", 7906 **{"global": global_}, # type: ignore 7907 ) 7908 7909 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7910 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7911 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7912 7913 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7914 index = self._index 7915 set_ = self.expression( 7916 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7917 ) 7918 7919 if self._curr: 7920 self._retreat(index) 7921 return self._parse_as_command(self._prev) 7922 7923 return set_ 7924 7925 def _parse_var_from_options( 7926 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7927 ) -> t.Optional[exp.Var]: 7928 start = self._curr 7929 if not start: 7930 return None 7931 7932 option = start.text.upper() 7933 continuations = options.get(option) 7934 7935 index = self._index 7936 self._advance() 7937 for keywords in continuations or []: 7938 if isinstance(keywords, str): 7939 keywords = (keywords,) 7940 7941 if self._match_text_seq(*keywords): 7942 option = f"{option} {' '.join(keywords)}" 7943 break 7944 else: 7945 if continuations or continuations is None: 7946 if raise_unmatched: 7947 self.raise_error(f"Unknown option {option}") 7948 7949 self._retreat(index) 7950 return None 7951 7952 return exp.var(option) 7953 7954 def _parse_as_command(self, start: Token) -> exp.Command: 7955 while self._curr: 7956 self._advance() 7957 text = self._find_sql(start, self._prev) 7958 size = len(start.text) 7959 self._warn_unsupported() 7960 return exp.Command(this=text[:size], expression=text[size:]) 7961 7962 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7963 settings = [] 7964 7965 self._match_l_paren() 7966 kind = self._parse_id_var() 7967 7968 if self._match(TokenType.L_PAREN): 7969 while True: 7970 key = self._parse_id_var() 7971 value = self._parse_primary() 7972 if not key and value is None: 7973 break 7974 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7975 self._match(TokenType.R_PAREN) 7976 7977 self._match_r_paren() 7978 7979 return self.expression( 7980 exp.DictProperty, 7981 this=this, 7982 kind=kind.this if kind else None, 7983 settings=settings, 7984 ) 7985 7986 def _parse_dict_range(self, this: str) -> exp.DictRange: 7987 self._match_l_paren() 7988 has_min = self._match_text_seq("MIN") 7989 if has_min: 7990 min = self._parse_var() or self._parse_primary() 7991 self._match_text_seq("MAX") 7992 max = self._parse_var() or self._parse_primary() 7993 else: 7994 max = self._parse_var() or self._parse_primary() 7995 min = exp.Literal.number(0) 7996 self._match_r_paren() 7997 return self.expression(exp.DictRange, this=this, min=min, max=max) 7998 7999 def _parse_comprehension( 8000 self, this: t.Optional[exp.Expression] 8001 ) -> t.Optional[exp.Comprehension]: 8002 index = self._index 8003 expression = self._parse_column() 8004 if not self._match(TokenType.IN): 8005 self._retreat(index - 1) 8006 return None 8007 iterator = self._parse_column() 8008 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8009 return self.expression( 8010 exp.Comprehension, 8011 this=this, 8012 expression=expression, 8013 iterator=iterator, 8014 condition=condition, 8015 ) 8016 8017 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8018 if self._match(TokenType.HEREDOC_STRING): 8019 return self.expression(exp.Heredoc, this=self._prev.text) 8020 8021 if not self._match_text_seq("$"): 8022 return None 8023 8024 tags = ["$"] 8025 tag_text = None 8026 8027 if self._is_connected(): 8028 self._advance() 8029 tags.append(self._prev.text.upper()) 8030 else: 8031 self.raise_error("No closing $ found") 8032 8033 if tags[-1] != "$": 8034 if self._is_connected() and self._match_text_seq("$"): 8035 tag_text = tags[-1] 8036 tags.append("$") 8037 else: 8038 self.raise_error("No closing $ found") 8039 8040 heredoc_start = self._curr 8041 8042 while self._curr: 8043 if self._match_text_seq(*tags, advance=False): 8044 this = self._find_sql(heredoc_start, self._prev) 8045 self._advance(len(tags)) 8046 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8047 8048 self._advance() 8049 8050 self.raise_error(f"No closing {''.join(tags)} found") 8051 return None 8052 8053 def _find_parser( 8054 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8055 ) -> t.Optional[t.Callable]: 8056 if not self._curr: 8057 return None 8058 8059 index = self._index 8060 this = [] 8061 while True: 8062 # The current token might be multiple words 8063 curr = self._curr.text.upper() 8064 key = curr.split(" ") 8065 this.append(curr) 8066 8067 self._advance() 8068 result, trie = in_trie(trie, key) 8069 if result == TrieResult.FAILED: 8070 break 8071 8072 if result == TrieResult.EXISTS: 8073 subparser = parsers[" ".join(this)] 8074 return subparser 8075 8076 self._retreat(index) 8077 return None 8078 8079 def _match(self, token_type, advance=True, expression=None): 8080 if not self._curr: 8081 return None 8082 8083 if self._curr.token_type == token_type: 8084 if advance: 8085 self._advance() 8086 self._add_comments(expression) 8087 return True 8088 8089 return None 8090 8091 def _match_set(self, types, advance=True): 8092 if not self._curr: 8093 return None 8094 8095 if self._curr.token_type in types: 8096 if advance: 8097 self._advance() 8098 return True 8099 8100 return None 8101 8102 def _match_pair(self, token_type_a, token_type_b, advance=True): 8103 if not self._curr or not self._next: 8104 return None 8105 8106 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8107 if advance: 8108 self._advance(2) 8109 return True 8110 8111 return None 8112 8113 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8114 if not self._match(TokenType.L_PAREN, expression=expression): 8115 self.raise_error("Expecting (") 8116 8117 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8118 if not self._match(TokenType.R_PAREN, expression=expression): 8119 self.raise_error("Expecting )") 8120 8121 def _match_texts(self, texts, advance=True): 8122 if ( 8123 self._curr 8124 and self._curr.token_type != TokenType.STRING 8125 and self._curr.text.upper() in texts 8126 ): 8127 if advance: 8128 self._advance() 8129 return True 8130 return None 8131 8132 def _match_text_seq(self, *texts, advance=True): 8133 index = self._index 8134 for text in texts: 8135 if ( 8136 self._curr 8137 and self._curr.token_type != TokenType.STRING 8138 and self._curr.text.upper() == text 8139 ): 8140 self._advance() 8141 else: 8142 self._retreat(index) 8143 return None 8144 8145 if not advance: 8146 self._retreat(index) 8147 8148 return True 8149 8150 def _replace_lambda( 8151 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8152 ) -> t.Optional[exp.Expression]: 8153 if not node: 8154 return node 8155 8156 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8157 8158 for column in node.find_all(exp.Column): 8159 typ = lambda_types.get(column.parts[0].name) 8160 if typ is not None: 8161 dot_or_id = column.to_dot() if column.table else column.this 8162 8163 if typ: 8164 dot_or_id = self.expression( 8165 exp.Cast, 8166 this=dot_or_id, 8167 to=typ, 8168 ) 8169 8170 parent = column.parent 8171 8172 while isinstance(parent, exp.Dot): 8173 if not isinstance(parent.parent, exp.Dot): 8174 parent.replace(dot_or_id) 8175 break 8176 parent = parent.parent 8177 else: 8178 if column is node: 8179 node = dot_or_id 8180 else: 8181 column.replace(dot_or_id) 8182 return node 8183 8184 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8185 start = self._prev 8186 8187 # Not to be confused with TRUNCATE(number, decimals) function call 8188 if self._match(TokenType.L_PAREN): 8189 self._retreat(self._index - 2) 8190 return self._parse_function() 8191 8192 # Clickhouse supports TRUNCATE DATABASE as well 8193 is_database = self._match(TokenType.DATABASE) 8194 8195 self._match(TokenType.TABLE) 8196 8197 exists = self._parse_exists(not_=False) 8198 8199 expressions = self._parse_csv( 8200 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8201 ) 8202 8203 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8204 8205 if self._match_text_seq("RESTART", "IDENTITY"): 8206 identity = "RESTART" 8207 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8208 identity = "CONTINUE" 8209 else: 8210 identity = None 8211 8212 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8213 option = self._prev.text 8214 else: 8215 option = None 8216 8217 partition = self._parse_partition() 8218 8219 # Fallback case 8220 if self._curr: 8221 return self._parse_as_command(start) 8222 8223 return self.expression( 8224 exp.TruncateTable, 8225 expressions=expressions, 8226 is_database=is_database, 8227 exists=exists, 8228 cluster=cluster, 8229 identity=identity, 8230 option=option, 8231 partition=partition, 8232 ) 8233 8234 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8235 this = self._parse_ordered(self._parse_opclass) 8236 8237 if not self._match(TokenType.WITH): 8238 return this 8239 8240 op = self._parse_var(any_token=True) 8241 8242 return self.expression(exp.WithOperator, this=this, op=op) 8243 8244 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8245 self._match(TokenType.EQ) 8246 self._match(TokenType.L_PAREN) 8247 8248 opts: t.List[t.Optional[exp.Expression]] = [] 8249 option: exp.Expression | None 8250 while self._curr and not self._match(TokenType.R_PAREN): 8251 if self._match_text_seq("FORMAT_NAME", "="): 8252 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8253 option = self._parse_format_name() 8254 else: 8255 option = self._parse_property() 8256 8257 if option is None: 8258 self.raise_error("Unable to parse option") 8259 break 8260 8261 opts.append(option) 8262 8263 return opts 8264 8265 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8266 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8267 8268 options = [] 8269 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8270 option = self._parse_var(any_token=True) 8271 prev = self._prev.text.upper() 8272 8273 # Different dialects might separate options and values by white space, "=" and "AS" 8274 self._match(TokenType.EQ) 8275 self._match(TokenType.ALIAS) 8276 8277 param = self.expression(exp.CopyParameter, this=option) 8278 8279 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8280 TokenType.L_PAREN, advance=False 8281 ): 8282 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8283 param.set("expressions", self._parse_wrapped_options()) 8284 elif prev == "FILE_FORMAT": 8285 # T-SQL's external file format case 8286 param.set("expression", self._parse_field()) 8287 else: 8288 param.set("expression", self._parse_unquoted_field()) 8289 8290 options.append(param) 8291 self._match(sep) 8292 8293 return options 8294 8295 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8296 expr = self.expression(exp.Credentials) 8297 8298 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8299 expr.set("storage", self._parse_field()) 8300 if self._match_text_seq("CREDENTIALS"): 8301 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8302 creds = ( 8303 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8304 ) 8305 expr.set("credentials", creds) 8306 if self._match_text_seq("ENCRYPTION"): 8307 expr.set("encryption", self._parse_wrapped_options()) 8308 if self._match_text_seq("IAM_ROLE"): 8309 expr.set("iam_role", self._parse_field()) 8310 if self._match_text_seq("REGION"): 8311 expr.set("region", self._parse_field()) 8312 8313 return expr 8314 8315 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8316 return self._parse_field() 8317 8318 def _parse_copy(self) -> exp.Copy | exp.Command: 8319 start = self._prev 8320 8321 self._match(TokenType.INTO) 8322 8323 this = ( 8324 self._parse_select(nested=True, parse_subquery_alias=False) 8325 if self._match(TokenType.L_PAREN, advance=False) 8326 else self._parse_table(schema=True) 8327 ) 8328 8329 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8330 8331 files = self._parse_csv(self._parse_file_location) 8332 credentials = self._parse_credentials() 8333 8334 self._match_text_seq("WITH") 8335 8336 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8337 8338 # Fallback case 8339 if self._curr: 8340 return self._parse_as_command(start) 8341 8342 return self.expression( 8343 exp.Copy, 8344 this=this, 8345 kind=kind, 8346 credentials=credentials, 8347 files=files, 8348 params=params, 8349 ) 8350 8351 def _parse_normalize(self) -> exp.Normalize: 8352 return self.expression( 8353 exp.Normalize, 8354 this=self._parse_bitwise(), 8355 form=self._match(TokenType.COMMA) and self._parse_var(), 8356 ) 8357 8358 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8359 args = self._parse_csv(lambda: self._parse_lambda()) 8360 8361 this = seq_get(args, 0) 8362 decimals = seq_get(args, 1) 8363 8364 return expr_type( 8365 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8366 ) 8367 8368 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8369 star_token = self._prev 8370 8371 if self._match_text_seq("COLUMNS", "(", advance=False): 8372 this = self._parse_function() 8373 if isinstance(this, exp.Columns): 8374 this.set("unpack", True) 8375 return this 8376 8377 return self.expression( 8378 exp.Star, 8379 **{ # type: ignore 8380 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8381 "replace": self._parse_star_op("REPLACE"), 8382 "rename": self._parse_star_op("RENAME"), 8383 }, 8384 ).update_positions(star_token) 8385 8386 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8387 privilege_parts = [] 8388 8389 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8390 # (end of privilege list) or L_PAREN (start of column list) are met 8391 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8392 privilege_parts.append(self._curr.text.upper()) 8393 self._advance() 8394 8395 this = exp.var(" ".join(privilege_parts)) 8396 expressions = ( 8397 self._parse_wrapped_csv(self._parse_column) 8398 if self._match(TokenType.L_PAREN, advance=False) 8399 else None 8400 ) 8401 8402 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8403 8404 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8405 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8406 principal = self._parse_id_var() 8407 8408 if not principal: 8409 return None 8410 8411 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8412 8413 def _parse_grant(self) -> exp.Grant | exp.Command: 8414 start = self._prev 8415 8416 privileges = self._parse_csv(self._parse_grant_privilege) 8417 8418 self._match(TokenType.ON) 8419 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8420 8421 # Attempt to parse the securable e.g. MySQL allows names 8422 # such as "foo.*", "*.*" which are not easily parseable yet 8423 securable = self._try_parse(self._parse_table_parts) 8424 8425 if not securable or not self._match_text_seq("TO"): 8426 return self._parse_as_command(start) 8427 8428 principals = self._parse_csv(self._parse_grant_principal) 8429 8430 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8431 8432 if self._curr: 8433 return self._parse_as_command(start) 8434 8435 return self.expression( 8436 exp.Grant, 8437 privileges=privileges, 8438 kind=kind, 8439 securable=securable, 8440 principals=principals, 8441 grant_option=grant_option, 8442 ) 8443 8444 def _parse_overlay(self) -> exp.Overlay: 8445 return self.expression( 8446 exp.Overlay, 8447 **{ # type: ignore 8448 "this": self._parse_bitwise(), 8449 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8450 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8451 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8452 }, 8453 ) 8454 8455 def _parse_format_name(self) -> exp.Property: 8456 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8457 # for FILE_FORMAT = <format_name> 8458 return self.expression( 8459 exp.Property, 8460 this=exp.var("FORMAT_NAME"), 8461 value=self._parse_string() or self._parse_table_parts(), 8462 ) 8463 8464 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8465 args: t.List[exp.Expression] = [] 8466 8467 if self._match(TokenType.DISTINCT): 8468 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8469 self._match(TokenType.COMMA) 8470 8471 args.extend(self._parse_csv(self._parse_assignment)) 8472 8473 return self.expression( 8474 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8475 ) 8476 8477 def _identifier_expression( 8478 self, token: t.Optional[Token] = None, **kwargs: t.Any 8479 ) -> exp.Identifier: 8480 token = token or self._prev 8481 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8482 expression.update_positions(token) 8483 return expression 8484 8485 def _build_pipe_cte( 8486 self, 8487 query: exp.Query, 8488 expressions: t.List[exp.Expression], 8489 alias_cte: t.Optional[exp.TableAlias] = None, 8490 ) -> exp.Select: 8491 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8492 if alias_cte: 8493 new_cte = alias_cte 8494 else: 8495 self._pipe_cte_counter += 1 8496 new_cte = f"__tmp{self._pipe_cte_counter}" 8497 8498 with_ = query.args.get("with") 8499 ctes = with_.pop() if with_ else None 8500 8501 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8502 if ctes: 8503 new_select.set("with", ctes) 8504 8505 return new_select.with_(new_cte, as_=query, copy=False) 8506 8507 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8508 select = self._parse_select(consume_pipe=False) 8509 if not select: 8510 return query 8511 8512 return self._build_pipe_cte( 8513 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8514 ) 8515 8516 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8517 limit = self._parse_limit() 8518 offset = self._parse_offset() 8519 if limit: 8520 curr_limit = query.args.get("limit", limit) 8521 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8522 query.limit(limit, copy=False) 8523 if offset: 8524 curr_offset = query.args.get("offset") 8525 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8526 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8527 8528 return query 8529 8530 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8531 this = self._parse_assignment() 8532 if self._match_text_seq("GROUP", "AND", advance=False): 8533 return this 8534 8535 this = self._parse_alias(this) 8536 8537 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8538 return self._parse_ordered(lambda: this) 8539 8540 return this 8541 8542 def _parse_pipe_syntax_aggregate_group_order_by( 8543 self, query: exp.Select, group_by_exists: bool = True 8544 ) -> exp.Select: 8545 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8546 aggregates_or_groups, orders = [], [] 8547 for element in expr: 8548 if isinstance(element, exp.Ordered): 8549 this = element.this 8550 if isinstance(this, exp.Alias): 8551 element.set("this", this.args["alias"]) 8552 orders.append(element) 8553 else: 8554 this = element 8555 aggregates_or_groups.append(this) 8556 8557 if group_by_exists: 8558 query.select(*aggregates_or_groups, copy=False).group_by( 8559 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8560 copy=False, 8561 ) 8562 else: 8563 query.select(*aggregates_or_groups, append=False, copy=False) 8564 8565 if orders: 8566 return query.order_by(*orders, append=False, copy=False) 8567 8568 return query 8569 8570 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8571 self._match_text_seq("AGGREGATE") 8572 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8573 8574 if self._match(TokenType.GROUP_BY) or ( 8575 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8576 ): 8577 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8578 8579 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8580 8581 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8582 first_setop = self.parse_set_operation(this=query) 8583 if not first_setop: 8584 return None 8585 8586 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8587 expr = self._parse_paren() 8588 return expr.assert_is(exp.Subquery).unnest() if expr else None 8589 8590 first_setop.this.pop() 8591 8592 setops = [ 8593 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8594 *self._parse_csv(_parse_and_unwrap_query), 8595 ] 8596 8597 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8598 with_ = query.args.get("with") 8599 ctes = with_.pop() if with_ else None 8600 8601 if isinstance(first_setop, exp.Union): 8602 query = query.union(*setops, copy=False, **first_setop.args) 8603 elif isinstance(first_setop, exp.Except): 8604 query = query.except_(*setops, copy=False, **first_setop.args) 8605 else: 8606 query = query.intersect(*setops, copy=False, **first_setop.args) 8607 8608 query.set("with", ctes) 8609 8610 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8611 8612 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8613 join = self._parse_join() 8614 if not join: 8615 return None 8616 8617 if isinstance(query, exp.Select): 8618 return query.join(join, copy=False) 8619 8620 return query 8621 8622 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8623 pivots = self._parse_pivots() 8624 if not pivots: 8625 return query 8626 8627 from_ = query.args.get("from") 8628 if from_: 8629 from_.this.set("pivots", pivots) 8630 8631 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8632 8633 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8634 self._match_text_seq("EXTEND") 8635 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8636 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8637 8638 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8639 sample = self._parse_table_sample() 8640 8641 with_ = query.args.get("with") 8642 if with_: 8643 with_.expressions[-1].this.set("sample", sample) 8644 else: 8645 query.set("sample", sample) 8646 8647 return query 8648 8649 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8650 if isinstance(query, exp.Subquery): 8651 query = exp.select("*").from_(query, copy=False) 8652 8653 if not query.args.get("from"): 8654 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8655 8656 while self._match(TokenType.PIPE_GT): 8657 start = self._curr 8658 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8659 if not parser: 8660 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8661 # keywords, making it tricky to disambiguate them without lookahead. The approach 8662 # here is to try and parse a set operation and if that fails, then try to parse a 8663 # join operator. If that fails as well, then the operator is not supported. 8664 parsed_query = self._parse_pipe_syntax_set_operator(query) 8665 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8666 if not parsed_query: 8667 self._retreat(start) 8668 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8669 break 8670 query = parsed_query 8671 else: 8672 query = parser(self, query) 8673 8674 return query 8675 8676 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8677 vars = self._parse_csv(self._parse_id_var) 8678 if not vars: 8679 return None 8680 8681 return self.expression( 8682 exp.DeclareItem, 8683 this=vars, 8684 kind=self._parse_types(), 8685 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8686 ) 8687 8688 def _parse_declare(self) -> exp.Declare | exp.Command: 8689 start = self._prev 8690 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8691 8692 if not expressions or self._curr: 8693 return self._parse_as_command(start) 8694 8695 return self.expression(exp.Declare, expressions=expressions) 8696 8697 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8698 exp_class = exp.Cast if strict else exp.TryCast 8699 8700 if exp_class == exp.TryCast: 8701 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8702 8703 return self.expression(exp_class, **kwargs) 8704 8705 def _parse_json_value(self) -> exp.JSONValue: 8706 this = self._parse_bitwise() 8707 self._match(TokenType.COMMA) 8708 path = self._parse_bitwise() 8709 8710 returning = self._match(TokenType.RETURNING) and self._parse_type() 8711 8712 return self.expression( 8713 exp.JSONValue, 8714 this=this, 8715 path=self.dialect.to_json_path(path), 8716 returning=returning, 8717 on_condition=self._parse_on_condition(), 8718 ) 8719 8720 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8721 def concat_exprs( 8722 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8723 ) -> exp.Expression: 8724 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8725 concat_exprs = [ 8726 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8727 ] 8728 node.set("expressions", concat_exprs) 8729 return node 8730 if len(exprs) == 1: 8731 return exprs[0] 8732 return self.expression(exp.Concat, expressions=args, safe=True) 8733 8734 args = self._parse_csv(self._parse_lambda) 8735 8736 if args: 8737 order = args[-1] if isinstance(args[-1], exp.Order) else None 8738 8739 if order: 8740 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8741 # remove 'expr' from exp.Order and add it back to args 8742 args[-1] = order.this 8743 order.set("this", concat_exprs(order.this, args)) 8744 8745 this = order or concat_exprs(args[0], args) 8746 else: 8747 this = None 8748 8749 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8750 8751 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 CAST_COLUMN_OPERATORS = { 796 TokenType.DOTCOLON, 797 TokenType.DCOLON, 798 } 799 800 EXPRESSION_PARSERS = { 801 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 802 exp.Column: lambda self: self._parse_column(), 803 exp.Condition: lambda self: self._parse_assignment(), 804 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 805 exp.Expression: lambda self: self._parse_expression(), 806 exp.From: lambda self: self._parse_from(joins=True), 807 exp.Group: lambda self: self._parse_group(), 808 exp.Having: lambda self: self._parse_having(), 809 exp.Hint: lambda self: self._parse_hint_body(), 810 exp.Identifier: lambda self: self._parse_id_var(), 811 exp.Join: lambda self: self._parse_join(), 812 exp.Lambda: lambda self: self._parse_lambda(), 813 exp.Lateral: lambda self: self._parse_lateral(), 814 exp.Limit: lambda self: self._parse_limit(), 815 exp.Offset: lambda self: self._parse_offset(), 816 exp.Order: lambda self: self._parse_order(), 817 exp.Ordered: lambda self: self._parse_ordered(), 818 exp.Properties: lambda self: self._parse_properties(), 819 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 820 exp.Qualify: lambda self: self._parse_qualify(), 821 exp.Returning: lambda self: self._parse_returning(), 822 exp.Select: lambda self: self._parse_select(), 823 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 824 exp.Table: lambda self: self._parse_table_parts(), 825 exp.TableAlias: lambda self: self._parse_table_alias(), 826 exp.Tuple: lambda self: self._parse_value(values=False), 827 exp.Whens: lambda self: self._parse_when_matched(), 828 exp.Where: lambda self: self._parse_where(), 829 exp.Window: lambda self: self._parse_named_window(), 830 exp.With: lambda self: self._parse_with(), 831 "JOIN_TYPE": lambda self: self._parse_join_parts(), 832 } 833 834 STATEMENT_PARSERS = { 835 TokenType.ALTER: lambda self: self._parse_alter(), 836 TokenType.ANALYZE: lambda self: self._parse_analyze(), 837 TokenType.BEGIN: lambda self: self._parse_transaction(), 838 TokenType.CACHE: lambda self: self._parse_cache(), 839 TokenType.COMMENT: lambda self: self._parse_comment(), 840 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 841 TokenType.COPY: lambda self: self._parse_copy(), 842 TokenType.CREATE: lambda self: self._parse_create(), 843 TokenType.DELETE: lambda self: self._parse_delete(), 844 TokenType.DESC: lambda self: self._parse_describe(), 845 TokenType.DESCRIBE: lambda self: self._parse_describe(), 846 TokenType.DROP: lambda self: self._parse_drop(), 847 TokenType.GRANT: lambda self: self._parse_grant(), 848 TokenType.INSERT: lambda self: self._parse_insert(), 849 TokenType.KILL: lambda self: self._parse_kill(), 850 TokenType.LOAD: lambda self: self._parse_load(), 851 TokenType.MERGE: lambda self: self._parse_merge(), 852 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 853 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 854 TokenType.REFRESH: lambda self: self._parse_refresh(), 855 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 856 TokenType.SET: lambda self: self._parse_set(), 857 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 858 TokenType.UNCACHE: lambda self: self._parse_uncache(), 859 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 860 TokenType.UPDATE: lambda self: self._parse_update(), 861 TokenType.USE: lambda self: self._parse_use(), 862 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 863 } 864 865 UNARY_PARSERS = { 866 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 867 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 868 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 869 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 870 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 871 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 872 } 873 874 STRING_PARSERS = { 875 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 876 exp.RawString, this=token.text 877 ), 878 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 879 exp.National, this=token.text 880 ), 881 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 882 TokenType.STRING: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=True 884 ), 885 TokenType.UNICODE_STRING: lambda self, token: self.expression( 886 exp.UnicodeString, 887 this=token.text, 888 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 889 ), 890 } 891 892 NUMERIC_PARSERS = { 893 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 894 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 895 TokenType.HEX_STRING: lambda self, token: self.expression( 896 exp.HexString, 897 this=token.text, 898 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 899 ), 900 TokenType.NUMBER: lambda self, token: self.expression( 901 exp.Literal, this=token.text, is_string=False 902 ), 903 } 904 905 PRIMARY_PARSERS = { 906 **STRING_PARSERS, 907 **NUMERIC_PARSERS, 908 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 909 TokenType.NULL: lambda self, _: self.expression(exp.Null), 910 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 911 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 912 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 913 TokenType.STAR: lambda self, _: self._parse_star_ops(), 914 } 915 916 PLACEHOLDER_PARSERS = { 917 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 918 TokenType.PARAMETER: lambda self: self._parse_parameter(), 919 TokenType.COLON: lambda self: ( 920 self.expression(exp.Placeholder, this=self._prev.text) 921 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 922 else None 923 ), 924 } 925 926 RANGE_PARSERS = { 927 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 928 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 929 TokenType.GLOB: binary_range_parser(exp.Glob), 930 TokenType.ILIKE: binary_range_parser(exp.ILike), 931 TokenType.IN: lambda self, this: self._parse_in(this), 932 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 933 TokenType.IS: lambda self, this: self._parse_is(this), 934 TokenType.LIKE: binary_range_parser(exp.Like), 935 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 936 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 937 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 938 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 939 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 940 } 941 942 PIPE_SYNTAX_TRANSFORM_PARSERS = { 943 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 944 "AS": lambda self, query: self._build_pipe_cte( 945 query, [exp.Star()], self._parse_table_alias() 946 ), 947 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 948 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 949 "ORDER BY": lambda self, query: query.order_by( 950 self._parse_order(), append=False, copy=False 951 ), 952 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 953 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 954 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 955 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 956 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 957 } 958 959 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 960 "ALLOWED_VALUES": lambda self: self.expression( 961 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 962 ), 963 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 964 "AUTO": lambda self: self._parse_auto_property(), 965 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 966 "BACKUP": lambda self: self.expression( 967 exp.BackupProperty, this=self._parse_var(any_token=True) 968 ), 969 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 970 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHECKSUM": lambda self: self._parse_checksum(), 973 "CLUSTER BY": lambda self: self._parse_cluster(), 974 "CLUSTERED": lambda self: self._parse_clustered_by(), 975 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 976 exp.CollateProperty, **kwargs 977 ), 978 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 979 "CONTAINS": lambda self: self._parse_contains_property(), 980 "COPY": lambda self: self._parse_copy_property(), 981 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 982 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 983 "DEFINER": lambda self: self._parse_definer(), 984 "DETERMINISTIC": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 986 ), 987 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 988 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 989 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 990 "DISTKEY": lambda self: self._parse_distkey(), 991 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 992 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 993 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 994 "ENVIRONMENT": lambda self: self.expression( 995 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 996 ), 997 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 998 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 999 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1000 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1001 "FREESPACE": lambda self: self._parse_freespace(), 1002 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1003 "HEAP": lambda self: self.expression(exp.HeapProperty), 1004 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1005 "IMMUTABLE": lambda self: self.expression( 1006 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1007 ), 1008 "INHERITS": lambda self: self.expression( 1009 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1010 ), 1011 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1012 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1013 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1014 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1015 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1016 "LIKE": lambda self: self._parse_create_like(), 1017 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1018 "LOCK": lambda self: self._parse_locking(), 1019 "LOCKING": lambda self: self._parse_locking(), 1020 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1021 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1022 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1023 "MODIFIES": lambda self: self._parse_modifies_property(), 1024 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1025 "NO": lambda self: self._parse_no_property(), 1026 "ON": lambda self: self._parse_on_property(), 1027 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1028 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1029 "PARTITION": lambda self: self._parse_partitioned_of(), 1030 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1033 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1034 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1035 "READS": lambda self: self._parse_reads_property(), 1036 "REMOTE": lambda self: self._parse_remote_with_connection(), 1037 "RETURNS": lambda self: self._parse_returns(), 1038 "STRICT": lambda self: self.expression(exp.StrictProperty), 1039 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1040 "ROW": lambda self: self._parse_row(), 1041 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1042 "SAMPLE": lambda self: self.expression( 1043 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1044 ), 1045 "SECURE": lambda self: self.expression(exp.SecureProperty), 1046 "SECURITY": lambda self: self._parse_security(), 1047 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1048 "SETTINGS": lambda self: self._parse_settings_property(), 1049 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1050 "SORTKEY": lambda self: self._parse_sortkey(), 1051 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1052 "STABLE": lambda self: self.expression( 1053 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1054 ), 1055 "STORED": lambda self: self._parse_stored(), 1056 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1057 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1058 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1059 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1060 "TO": lambda self: self._parse_to_table(), 1061 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1062 "TRANSFORM": lambda self: self.expression( 1063 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1064 ), 1065 "TTL": lambda self: self._parse_ttl(), 1066 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1067 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1068 "VOLATILE": lambda self: self._parse_volatile_property(), 1069 "WITH": lambda self: self._parse_with_property(), 1070 } 1071 1072 CONSTRAINT_PARSERS = { 1073 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1074 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1075 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1076 "CHARACTER SET": lambda self: self.expression( 1077 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "CHECK": lambda self: self.expression( 1080 exp.CheckColumnConstraint, 1081 this=self._parse_wrapped(self._parse_assignment), 1082 enforced=self._match_text_seq("ENFORCED"), 1083 ), 1084 "COLLATE": lambda self: self.expression( 1085 exp.CollateColumnConstraint, 1086 this=self._parse_identifier() or self._parse_column(), 1087 ), 1088 "COMMENT": lambda self: self.expression( 1089 exp.CommentColumnConstraint, this=self._parse_string() 1090 ), 1091 "COMPRESS": lambda self: self._parse_compress(), 1092 "CLUSTERED": lambda self: self.expression( 1093 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1094 ), 1095 "NONCLUSTERED": lambda self: self.expression( 1096 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1097 ), 1098 "DEFAULT": lambda self: self.expression( 1099 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1102 "EPHEMERAL": lambda self: self.expression( 1103 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "EXCLUDE": lambda self: self.expression( 1106 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1107 ), 1108 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1109 "FORMAT": lambda self: self.expression( 1110 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1111 ), 1112 "GENERATED": lambda self: self._parse_generated_as_identity(), 1113 "IDENTITY": lambda self: self._parse_auto_increment(), 1114 "INLINE": lambda self: self._parse_inline(), 1115 "LIKE": lambda self: self._parse_create_like(), 1116 "NOT": lambda self: self._parse_not_constraint(), 1117 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1118 "ON": lambda self: ( 1119 self._match(TokenType.UPDATE) 1120 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1121 ) 1122 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1123 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1124 "PERIOD": lambda self: self._parse_period_for_system_time(), 1125 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1126 "REFERENCES": lambda self: self._parse_references(match=False), 1127 "TITLE": lambda self: self.expression( 1128 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1129 ), 1130 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1131 "UNIQUE": lambda self: self._parse_unique(), 1132 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1133 "WATERMARK": lambda self: self.expression( 1134 exp.WatermarkColumnConstraint, 1135 this=self._match(TokenType.FOR) and self._parse_column(), 1136 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1137 ), 1138 "WITH": lambda self: self.expression( 1139 exp.Properties, expressions=self._parse_wrapped_properties() 1140 ), 1141 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 } 1144 1145 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1146 if not self._match(TokenType.L_PAREN, advance=False): 1147 # Partitioning by bucket or truncate follows the syntax: 1148 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1149 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1150 self._retreat(self._index - 1) 1151 return None 1152 1153 klass = ( 1154 exp.PartitionedByBucket 1155 if self._prev.text.upper() == "BUCKET" 1156 else exp.PartitionByTruncate 1157 ) 1158 1159 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1160 this, expression = seq_get(args, 0), seq_get(args, 1) 1161 1162 if isinstance(this, exp.Literal): 1163 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1164 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1165 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1166 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1167 # 1168 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1169 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1170 this, expression = expression, this 1171 1172 return self.expression(klass, this=this, expression=expression) 1173 1174 ALTER_PARSERS = { 1175 "ADD": lambda self: self._parse_alter_table_add(), 1176 "AS": lambda self: self._parse_select(), 1177 "ALTER": lambda self: self._parse_alter_table_alter(), 1178 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1179 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1180 "DROP": lambda self: self._parse_alter_table_drop(), 1181 "RENAME": lambda self: self._parse_alter_table_rename(), 1182 "SET": lambda self: self._parse_alter_table_set(), 1183 "SWAP": lambda self: self.expression( 1184 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1185 ), 1186 } 1187 1188 ALTER_ALTER_PARSERS = { 1189 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1190 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1191 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1192 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1193 } 1194 1195 SCHEMA_UNNAMED_CONSTRAINTS = { 1196 "CHECK", 1197 "EXCLUDE", 1198 "FOREIGN KEY", 1199 "LIKE", 1200 "PERIOD", 1201 "PRIMARY KEY", 1202 "UNIQUE", 1203 "WATERMARK", 1204 "BUCKET", 1205 "TRUNCATE", 1206 } 1207 1208 NO_PAREN_FUNCTION_PARSERS = { 1209 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1210 "CASE": lambda self: self._parse_case(), 1211 "CONNECT_BY_ROOT": lambda self: self.expression( 1212 exp.ConnectByRoot, this=self._parse_column() 1213 ), 1214 "IF": lambda self: self._parse_if(), 1215 } 1216 1217 INVALID_FUNC_NAME_TOKENS = { 1218 TokenType.IDENTIFIER, 1219 TokenType.STRING, 1220 } 1221 1222 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1223 1224 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1225 1226 FUNCTION_PARSERS = { 1227 **{ 1228 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1229 }, 1230 **{ 1231 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1232 }, 1233 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1234 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1235 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1236 "DECODE": lambda self: self._parse_decode(), 1237 "EXTRACT": lambda self: self._parse_extract(), 1238 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1239 "GAP_FILL": lambda self: self._parse_gap_fill(), 1240 "JSON_OBJECT": lambda self: self._parse_json_object(), 1241 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1242 "JSON_TABLE": lambda self: self._parse_json_table(), 1243 "MATCH": lambda self: self._parse_match_against(), 1244 "NORMALIZE": lambda self: self._parse_normalize(), 1245 "OPENJSON": lambda self: self._parse_open_json(), 1246 "OVERLAY": lambda self: self._parse_overlay(), 1247 "POSITION": lambda self: self._parse_position(), 1248 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1249 "STRING_AGG": lambda self: self._parse_string_agg(), 1250 "SUBSTRING": lambda self: self._parse_substring(), 1251 "TRIM": lambda self: self._parse_trim(), 1252 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1253 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1254 "XMLELEMENT": lambda self: self.expression( 1255 exp.XMLElement, 1256 this=self._match_text_seq("NAME") and self._parse_id_var(), 1257 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1258 ), 1259 "XMLTABLE": lambda self: self._parse_xml_table(), 1260 } 1261 1262 QUERY_MODIFIER_PARSERS = { 1263 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1264 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1265 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1266 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1267 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1268 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1269 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1270 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1271 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1272 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1273 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1274 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1275 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1276 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1277 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.CLUSTER_BY: lambda self: ( 1279 "cluster", 1280 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1281 ), 1282 TokenType.DISTRIBUTE_BY: lambda self: ( 1283 "distribute", 1284 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1285 ), 1286 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1287 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1288 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1289 } 1290 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1291 1292 SET_PARSERS = { 1293 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1294 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1295 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1296 "TRANSACTION": lambda self: self._parse_set_transaction(), 1297 } 1298 1299 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1300 1301 TYPE_LITERAL_PARSERS = { 1302 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1303 } 1304 1305 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1306 1307 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1308 1309 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1310 1311 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1312 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1313 "ISOLATION": ( 1314 ("LEVEL", "REPEATABLE", "READ"), 1315 ("LEVEL", "READ", "COMMITTED"), 1316 ("LEVEL", "READ", "UNCOMITTED"), 1317 ("LEVEL", "SERIALIZABLE"), 1318 ), 1319 "READ": ("WRITE", "ONLY"), 1320 } 1321 1322 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1323 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1324 ) 1325 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1326 1327 CREATE_SEQUENCE: OPTIONS_TYPE = { 1328 "SCALE": ("EXTEND", "NOEXTEND"), 1329 "SHARD": ("EXTEND", "NOEXTEND"), 1330 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1331 **dict.fromkeys( 1332 ( 1333 "SESSION", 1334 "GLOBAL", 1335 "KEEP", 1336 "NOKEEP", 1337 "ORDER", 1338 "NOORDER", 1339 "NOCACHE", 1340 "CYCLE", 1341 "NOCYCLE", 1342 "NOMINVALUE", 1343 "NOMAXVALUE", 1344 "NOSCALE", 1345 "NOSHARD", 1346 ), 1347 tuple(), 1348 ), 1349 } 1350 1351 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1352 1353 USABLES: OPTIONS_TYPE = dict.fromkeys( 1354 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1355 ) 1356 1357 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1358 1359 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1360 "TYPE": ("EVOLUTION",), 1361 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1362 } 1363 1364 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1365 1366 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1367 1368 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1369 "NOT": ("ENFORCED",), 1370 "MATCH": ( 1371 "FULL", 1372 "PARTIAL", 1373 "SIMPLE", 1374 ), 1375 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1376 "USING": ( 1377 "BTREE", 1378 "HASH", 1379 ), 1380 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1381 } 1382 1383 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1384 "NO": ("OTHERS",), 1385 "CURRENT": ("ROW",), 1386 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1387 } 1388 1389 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1390 1391 CLONE_KEYWORDS = {"CLONE", "COPY"} 1392 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1393 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1394 1395 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1396 1397 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1398 1399 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1400 1401 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1402 1403 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1404 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1405 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1406 1407 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1408 1409 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1410 1411 ADD_CONSTRAINT_TOKENS = { 1412 TokenType.CONSTRAINT, 1413 TokenType.FOREIGN_KEY, 1414 TokenType.INDEX, 1415 TokenType.KEY, 1416 TokenType.PRIMARY_KEY, 1417 TokenType.UNIQUE, 1418 } 1419 1420 DISTINCT_TOKENS = {TokenType.DISTINCT} 1421 1422 NULL_TOKENS = {TokenType.NULL} 1423 1424 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1425 1426 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1427 1428 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1429 1430 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1431 1432 ODBC_DATETIME_LITERALS = { 1433 "d": exp.Date, 1434 "t": exp.Time, 1435 "ts": exp.Timestamp, 1436 } 1437 1438 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1439 1440 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1441 1442 # The style options for the DESCRIBE statement 1443 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1444 1445 # The style options for the ANALYZE statement 1446 ANALYZE_STYLES = { 1447 "BUFFER_USAGE_LIMIT", 1448 "FULL", 1449 "LOCAL", 1450 "NO_WRITE_TO_BINLOG", 1451 "SAMPLE", 1452 "SKIP_LOCKED", 1453 "VERBOSE", 1454 } 1455 1456 ANALYZE_EXPRESSION_PARSERS = { 1457 "ALL": lambda self: self._parse_analyze_columns(), 1458 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1459 "DELETE": lambda self: self._parse_analyze_delete(), 1460 "DROP": lambda self: self._parse_analyze_histogram(), 1461 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1462 "LIST": lambda self: self._parse_analyze_list(), 1463 "PREDICATE": lambda self: self._parse_analyze_columns(), 1464 "UPDATE": lambda self: self._parse_analyze_histogram(), 1465 "VALIDATE": lambda self: self._parse_analyze_validate(), 1466 } 1467 1468 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1469 1470 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1471 1472 OPERATION_MODIFIERS: t.Set[str] = set() 1473 1474 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1475 1476 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1477 1478 STRICT_CAST = True 1479 1480 PREFIXED_PIVOT_COLUMNS = False 1481 IDENTIFY_PIVOT_STRINGS = False 1482 1483 LOG_DEFAULTS_TO_LN = False 1484 1485 # Whether the table sample clause expects CSV syntax 1486 TABLESAMPLE_CSV = False 1487 1488 # The default method used for table sampling 1489 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1490 1491 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1492 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1493 1494 # Whether the TRIM function expects the characters to trim as its first argument 1495 TRIM_PATTERN_FIRST = False 1496 1497 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1498 STRING_ALIASES = False 1499 1500 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1501 MODIFIERS_ATTACHED_TO_SET_OP = True 1502 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1503 1504 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1505 NO_PAREN_IF_COMMANDS = True 1506 1507 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1508 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1509 1510 # Whether the `:` operator is used to extract a value from a VARIANT column 1511 COLON_IS_VARIANT_EXTRACT = False 1512 1513 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1514 # If this is True and '(' is not found, the keyword will be treated as an identifier 1515 VALUES_FOLLOWED_BY_PAREN = True 1516 1517 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1518 SUPPORTS_IMPLICIT_UNNEST = False 1519 1520 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1521 INTERVAL_SPANS = True 1522 1523 # Whether a PARTITION clause can follow a table reference 1524 SUPPORTS_PARTITION_SELECTION = False 1525 1526 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1527 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1528 1529 # Whether the 'AS' keyword is optional in the CTE definition syntax 1530 OPTIONAL_ALIAS_TOKEN_CTE = True 1531 1532 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1533 ALTER_RENAME_REQUIRES_COLUMN = True 1534 1535 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1536 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1537 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1538 # as BigQuery, where all joins have the same precedence. 1539 JOINS_HAVE_EQUAL_PRECEDENCE = False 1540 1541 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1542 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1543 1544 # Whether map literals support arbitrary expressions as keys. 1545 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1546 # When False, keys are typically restricted to identifiers. 1547 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1548 1549 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1550 # is true for Snowflake but not for BigQuery which can also process strings 1551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1552 1553 __slots__ = ( 1554 "error_level", 1555 "error_message_context", 1556 "max_errors", 1557 "dialect", 1558 "sql", 1559 "errors", 1560 "_tokens", 1561 "_index", 1562 "_curr", 1563 "_next", 1564 "_prev", 1565 "_prev_comments", 1566 "_pipe_cte_counter", 1567 ) 1568 1569 # Autofilled 1570 SHOW_TRIE: t.Dict = {} 1571 SET_TRIE: t.Dict = {} 1572 1573 def __init__( 1574 self, 1575 error_level: t.Optional[ErrorLevel] = None, 1576 error_message_context: int = 100, 1577 max_errors: int = 3, 1578 dialect: DialectType = None, 1579 ): 1580 from sqlglot.dialects import Dialect 1581 1582 self.error_level = error_level or ErrorLevel.IMMEDIATE 1583 self.error_message_context = error_message_context 1584 self.max_errors = max_errors 1585 self.dialect = Dialect.get_or_raise(dialect) 1586 self.reset() 1587 1588 def reset(self): 1589 self.sql = "" 1590 self.errors = [] 1591 self._tokens = [] 1592 self._index = 0 1593 self._curr = None 1594 self._next = None 1595 self._prev = None 1596 self._prev_comments = None 1597 self._pipe_cte_counter = 0 1598 1599 def parse( 1600 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens and returns a list of syntax trees, one tree 1604 per parsed SQL statement. 1605 1606 Args: 1607 raw_tokens: The list of tokens. 1608 sql: The original SQL string, used to produce helpful debug messages. 1609 1610 Returns: 1611 The list of the produced syntax trees. 1612 """ 1613 return self._parse( 1614 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1615 ) 1616 1617 def parse_into( 1618 self, 1619 expression_types: exp.IntoType, 1620 raw_tokens: t.List[Token], 1621 sql: t.Optional[str] = None, 1622 ) -> t.List[t.Optional[exp.Expression]]: 1623 """ 1624 Parses a list of tokens into a given Expression type. If a collection of Expression 1625 types is given instead, this method will try to parse the token list into each one 1626 of them, stopping at the first for which the parsing succeeds. 1627 1628 Args: 1629 expression_types: The expression type(s) to try and parse the token list into. 1630 raw_tokens: The list of tokens. 1631 sql: The original SQL string, used to produce helpful debug messages. 1632 1633 Returns: 1634 The target Expression. 1635 """ 1636 errors = [] 1637 for expression_type in ensure_list(expression_types): 1638 parser = self.EXPRESSION_PARSERS.get(expression_type) 1639 if not parser: 1640 raise TypeError(f"No parser registered for {expression_type}") 1641 1642 try: 1643 return self._parse(parser, raw_tokens, sql) 1644 except ParseError as e: 1645 e.errors[0]["into_expression"] = expression_type 1646 errors.append(e) 1647 1648 raise ParseError( 1649 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1650 errors=merge_errors(errors), 1651 ) from errors[-1] 1652 1653 def _parse( 1654 self, 1655 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1656 raw_tokens: t.List[Token], 1657 sql: t.Optional[str] = None, 1658 ) -> t.List[t.Optional[exp.Expression]]: 1659 self.reset() 1660 self.sql = sql or "" 1661 1662 total = len(raw_tokens) 1663 chunks: t.List[t.List[Token]] = [[]] 1664 1665 for i, token in enumerate(raw_tokens): 1666 if token.token_type == TokenType.SEMICOLON: 1667 if token.comments: 1668 chunks.append([token]) 1669 1670 if i < total - 1: 1671 chunks.append([]) 1672 else: 1673 chunks[-1].append(token) 1674 1675 expressions = [] 1676 1677 for tokens in chunks: 1678 self._index = -1 1679 self._tokens = tokens 1680 self._advance() 1681 1682 expressions.append(parse_method(self)) 1683 1684 if self._index < len(self._tokens): 1685 self.raise_error("Invalid expression / Unexpected token") 1686 1687 self.check_errors() 1688 1689 return expressions 1690 1691 def check_errors(self) -> None: 1692 """Logs or raises any found errors, depending on the chosen error level setting.""" 1693 if self.error_level == ErrorLevel.WARN: 1694 for error in self.errors: 1695 logger.error(str(error)) 1696 elif self.error_level == ErrorLevel.RAISE and self.errors: 1697 raise ParseError( 1698 concat_messages(self.errors, self.max_errors), 1699 errors=merge_errors(self.errors), 1700 ) 1701 1702 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1703 """ 1704 Appends an error in the list of recorded errors or raises it, depending on the chosen 1705 error level setting. 1706 """ 1707 token = token or self._curr or self._prev or Token.string("") 1708 start = token.start 1709 end = token.end + 1 1710 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1711 highlight = self.sql[start:end] 1712 end_context = self.sql[end : end + self.error_message_context] 1713 1714 error = ParseError.new( 1715 f"{message}. Line {token.line}, Col: {token.col}.\n" 1716 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1717 description=message, 1718 line=token.line, 1719 col=token.col, 1720 start_context=start_context, 1721 highlight=highlight, 1722 end_context=end_context, 1723 ) 1724 1725 if self.error_level == ErrorLevel.IMMEDIATE: 1726 raise error 1727 1728 self.errors.append(error) 1729 1730 def expression( 1731 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1732 ) -> E: 1733 """ 1734 Creates a new, validated Expression. 1735 1736 Args: 1737 exp_class: The expression class to instantiate. 1738 comments: An optional list of comments to attach to the expression. 1739 kwargs: The arguments to set for the expression along with their respective values. 1740 1741 Returns: 1742 The target expression. 1743 """ 1744 instance = exp_class(**kwargs) 1745 instance.add_comments(comments) if comments else self._add_comments(instance) 1746 return self.validate_expression(instance) 1747 1748 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1749 if expression and self._prev_comments: 1750 expression.add_comments(self._prev_comments) 1751 self._prev_comments = None 1752 1753 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1754 """ 1755 Validates an Expression, making sure that all its mandatory arguments are set. 1756 1757 Args: 1758 expression: The expression to validate. 1759 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1760 1761 Returns: 1762 The validated expression. 1763 """ 1764 if self.error_level != ErrorLevel.IGNORE: 1765 for error_message in expression.error_messages(args): 1766 self.raise_error(error_message) 1767 1768 return expression 1769 1770 def _find_sql(self, start: Token, end: Token) -> str: 1771 return self.sql[start.start : end.end + 1] 1772 1773 def _is_connected(self) -> bool: 1774 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1775 1776 def _advance(self, times: int = 1) -> None: 1777 self._index += times 1778 self._curr = seq_get(self._tokens, self._index) 1779 self._next = seq_get(self._tokens, self._index + 1) 1780 1781 if self._index > 0: 1782 self._prev = self._tokens[self._index - 1] 1783 self._prev_comments = self._prev.comments 1784 else: 1785 self._prev = None 1786 self._prev_comments = None 1787 1788 def _retreat(self, index: int) -> None: 1789 if index != self._index: 1790 self._advance(index - self._index) 1791 1792 def _warn_unsupported(self) -> None: 1793 if len(self._tokens) <= 1: 1794 return 1795 1796 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1797 # interested in emitting a warning for the one being currently processed. 1798 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1799 1800 logger.warning( 1801 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1802 ) 1803 1804 def _parse_command(self) -> exp.Command: 1805 self._warn_unsupported() 1806 return self.expression( 1807 exp.Command, 1808 comments=self._prev_comments, 1809 this=self._prev.text.upper(), 1810 expression=self._parse_string(), 1811 ) 1812 1813 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1814 """ 1815 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1816 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1817 solve this by setting & resetting the parser state accordingly 1818 """ 1819 index = self._index 1820 error_level = self.error_level 1821 1822 self.error_level = ErrorLevel.IMMEDIATE 1823 try: 1824 this = parse_method() 1825 except ParseError: 1826 this = None 1827 finally: 1828 if not this or retreat: 1829 self._retreat(index) 1830 self.error_level = error_level 1831 1832 return this 1833 1834 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1835 start = self._prev 1836 exists = self._parse_exists() if allow_exists else None 1837 1838 self._match(TokenType.ON) 1839 1840 materialized = self._match_text_seq("MATERIALIZED") 1841 kind = self._match_set(self.CREATABLES) and self._prev 1842 if not kind: 1843 return self._parse_as_command(start) 1844 1845 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1846 this = self._parse_user_defined_function(kind=kind.token_type) 1847 elif kind.token_type == TokenType.TABLE: 1848 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1849 elif kind.token_type == TokenType.COLUMN: 1850 this = self._parse_column() 1851 else: 1852 this = self._parse_id_var() 1853 1854 self._match(TokenType.IS) 1855 1856 return self.expression( 1857 exp.Comment, 1858 this=this, 1859 kind=kind.text, 1860 expression=self._parse_string(), 1861 exists=exists, 1862 materialized=materialized, 1863 ) 1864 1865 def _parse_to_table( 1866 self, 1867 ) -> exp.ToTableProperty: 1868 table = self._parse_table_parts(schema=True) 1869 return self.expression(exp.ToTableProperty, this=table) 1870 1871 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1872 def _parse_ttl(self) -> exp.Expression: 1873 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1874 this = self._parse_bitwise() 1875 1876 if self._match_text_seq("DELETE"): 1877 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1878 if self._match_text_seq("RECOMPRESS"): 1879 return self.expression( 1880 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1881 ) 1882 if self._match_text_seq("TO", "DISK"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1885 ) 1886 if self._match_text_seq("TO", "VOLUME"): 1887 return self.expression( 1888 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1889 ) 1890 1891 return this 1892 1893 expressions = self._parse_csv(_parse_ttl_action) 1894 where = self._parse_where() 1895 group = self._parse_group() 1896 1897 aggregates = None 1898 if group and self._match(TokenType.SET): 1899 aggregates = self._parse_csv(self._parse_set_item) 1900 1901 return self.expression( 1902 exp.MergeTreeTTL, 1903 expressions=expressions, 1904 where=where, 1905 group=group, 1906 aggregates=aggregates, 1907 ) 1908 1909 def _parse_statement(self) -> t.Optional[exp.Expression]: 1910 if self._curr is None: 1911 return None 1912 1913 if self._match_set(self.STATEMENT_PARSERS): 1914 comments = self._prev_comments 1915 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1916 stmt.add_comments(comments, prepend=True) 1917 return stmt 1918 1919 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1920 return self._parse_command() 1921 1922 expression = self._parse_expression() 1923 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1924 return self._parse_query_modifiers(expression) 1925 1926 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1927 start = self._prev 1928 temporary = self._match(TokenType.TEMPORARY) 1929 materialized = self._match_text_seq("MATERIALIZED") 1930 1931 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1932 if not kind: 1933 return self._parse_as_command(start) 1934 1935 concurrently = self._match_text_seq("CONCURRENTLY") 1936 if_exists = exists or self._parse_exists() 1937 1938 if kind == "COLUMN": 1939 this = self._parse_column() 1940 else: 1941 this = self._parse_table_parts( 1942 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1943 ) 1944 1945 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1946 1947 if self._match(TokenType.L_PAREN, advance=False): 1948 expressions = self._parse_wrapped_csv(self._parse_types) 1949 else: 1950 expressions = None 1951 1952 return self.expression( 1953 exp.Drop, 1954 exists=if_exists, 1955 this=this, 1956 expressions=expressions, 1957 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1958 temporary=temporary, 1959 materialized=materialized, 1960 cascade=self._match_text_seq("CASCADE"), 1961 constraints=self._match_text_seq("CONSTRAINTS"), 1962 purge=self._match_text_seq("PURGE"), 1963 cluster=cluster, 1964 concurrently=concurrently, 1965 ) 1966 1967 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1968 return ( 1969 self._match_text_seq("IF") 1970 and (not not_ or self._match(TokenType.NOT)) 1971 and self._match(TokenType.EXISTS) 1972 ) 1973 1974 def _parse_create(self) -> exp.Create | exp.Command: 1975 # Note: this can't be None because we've matched a statement parser 1976 start = self._prev 1977 1978 replace = ( 1979 start.token_type == TokenType.REPLACE 1980 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1981 or self._match_pair(TokenType.OR, TokenType.ALTER) 1982 ) 1983 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1984 1985 unique = self._match(TokenType.UNIQUE) 1986 1987 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1988 clustered = True 1989 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1990 "COLUMNSTORE" 1991 ): 1992 clustered = False 1993 else: 1994 clustered = None 1995 1996 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1997 self._advance() 1998 1999 properties = None 2000 create_token = self._match_set(self.CREATABLES) and self._prev 2001 2002 if not create_token: 2003 # exp.Properties.Location.POST_CREATE 2004 properties = self._parse_properties() 2005 create_token = self._match_set(self.CREATABLES) and self._prev 2006 2007 if not properties or not create_token: 2008 return self._parse_as_command(start) 2009 2010 concurrently = self._match_text_seq("CONCURRENTLY") 2011 exists = self._parse_exists(not_=True) 2012 this = None 2013 expression: t.Optional[exp.Expression] = None 2014 indexes = None 2015 no_schema_binding = None 2016 begin = None 2017 end = None 2018 clone = None 2019 2020 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2021 nonlocal properties 2022 if properties and temp_props: 2023 properties.expressions.extend(temp_props.expressions) 2024 elif temp_props: 2025 properties = temp_props 2026 2027 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2028 this = self._parse_user_defined_function(kind=create_token.token_type) 2029 2030 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2031 extend_props(self._parse_properties()) 2032 2033 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2034 extend_props(self._parse_properties()) 2035 2036 if not expression: 2037 if self._match(TokenType.COMMAND): 2038 expression = self._parse_as_command(self._prev) 2039 else: 2040 begin = self._match(TokenType.BEGIN) 2041 return_ = self._match_text_seq("RETURN") 2042 2043 if self._match(TokenType.STRING, advance=False): 2044 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2045 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2046 expression = self._parse_string() 2047 extend_props(self._parse_properties()) 2048 else: 2049 expression = self._parse_user_defined_function_expression() 2050 2051 end = self._match_text_seq("END") 2052 2053 if return_: 2054 expression = self.expression(exp.Return, this=expression) 2055 elif create_token.token_type == TokenType.INDEX: 2056 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2057 if not self._match(TokenType.ON): 2058 index = self._parse_id_var() 2059 anonymous = False 2060 else: 2061 index = None 2062 anonymous = True 2063 2064 this = self._parse_index(index=index, anonymous=anonymous) 2065 elif create_token.token_type in self.DB_CREATABLES: 2066 table_parts = self._parse_table_parts( 2067 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2068 ) 2069 2070 # exp.Properties.Location.POST_NAME 2071 self._match(TokenType.COMMA) 2072 extend_props(self._parse_properties(before=True)) 2073 2074 this = self._parse_schema(this=table_parts) 2075 2076 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2077 extend_props(self._parse_properties()) 2078 2079 has_alias = self._match(TokenType.ALIAS) 2080 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2081 # exp.Properties.Location.POST_ALIAS 2082 extend_props(self._parse_properties()) 2083 2084 if create_token.token_type == TokenType.SEQUENCE: 2085 expression = self._parse_types() 2086 props = self._parse_properties() 2087 if props: 2088 sequence_props = exp.SequenceProperties() 2089 options = [] 2090 for prop in props: 2091 if isinstance(prop, exp.SequenceProperties): 2092 for arg, value in prop.args.items(): 2093 if arg == "options": 2094 options.extend(value) 2095 else: 2096 sequence_props.set(arg, value) 2097 prop.pop() 2098 2099 if options: 2100 sequence_props.set("options", options) 2101 2102 props.append("expressions", sequence_props) 2103 extend_props(props) 2104 else: 2105 expression = self._parse_ddl_select() 2106 2107 # Some dialects also support using a table as an alias instead of a SELECT. 2108 # Here we fallback to this as an alternative. 2109 if not expression and has_alias: 2110 expression = self._try_parse(self._parse_table_parts) 2111 2112 if create_token.token_type == TokenType.TABLE: 2113 # exp.Properties.Location.POST_EXPRESSION 2114 extend_props(self._parse_properties()) 2115 2116 indexes = [] 2117 while True: 2118 index = self._parse_index() 2119 2120 # exp.Properties.Location.POST_INDEX 2121 extend_props(self._parse_properties()) 2122 if not index: 2123 break 2124 else: 2125 self._match(TokenType.COMMA) 2126 indexes.append(index) 2127 elif create_token.token_type == TokenType.VIEW: 2128 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2129 no_schema_binding = True 2130 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2131 extend_props(self._parse_properties()) 2132 2133 shallow = self._match_text_seq("SHALLOW") 2134 2135 if self._match_texts(self.CLONE_KEYWORDS): 2136 copy = self._prev.text.lower() == "copy" 2137 clone = self.expression( 2138 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2139 ) 2140 2141 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2142 return self._parse_as_command(start) 2143 2144 create_kind_text = create_token.text.upper() 2145 return self.expression( 2146 exp.Create, 2147 this=this, 2148 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2149 replace=replace, 2150 refresh=refresh, 2151 unique=unique, 2152 expression=expression, 2153 exists=exists, 2154 properties=properties, 2155 indexes=indexes, 2156 no_schema_binding=no_schema_binding, 2157 begin=begin, 2158 end=end, 2159 clone=clone, 2160 concurrently=concurrently, 2161 clustered=clustered, 2162 ) 2163 2164 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2165 seq = exp.SequenceProperties() 2166 2167 options = [] 2168 index = self._index 2169 2170 while self._curr: 2171 self._match(TokenType.COMMA) 2172 if self._match_text_seq("INCREMENT"): 2173 self._match_text_seq("BY") 2174 self._match_text_seq("=") 2175 seq.set("increment", self._parse_term()) 2176 elif self._match_text_seq("MINVALUE"): 2177 seq.set("minvalue", self._parse_term()) 2178 elif self._match_text_seq("MAXVALUE"): 2179 seq.set("maxvalue", self._parse_term()) 2180 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2181 self._match_text_seq("=") 2182 seq.set("start", self._parse_term()) 2183 elif self._match_text_seq("CACHE"): 2184 # T-SQL allows empty CACHE which is initialized dynamically 2185 seq.set("cache", self._parse_number() or True) 2186 elif self._match_text_seq("OWNED", "BY"): 2187 # "OWNED BY NONE" is the default 2188 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2189 else: 2190 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2191 if opt: 2192 options.append(opt) 2193 else: 2194 break 2195 2196 seq.set("options", options if options else None) 2197 return None if self._index == index else seq 2198 2199 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2200 # only used for teradata currently 2201 self._match(TokenType.COMMA) 2202 2203 kwargs = { 2204 "no": self._match_text_seq("NO"), 2205 "dual": self._match_text_seq("DUAL"), 2206 "before": self._match_text_seq("BEFORE"), 2207 "default": self._match_text_seq("DEFAULT"), 2208 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2209 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2210 "after": self._match_text_seq("AFTER"), 2211 "minimum": self._match_texts(("MIN", "MINIMUM")), 2212 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2213 } 2214 2215 if self._match_texts(self.PROPERTY_PARSERS): 2216 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2217 try: 2218 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2219 except TypeError: 2220 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2221 2222 return None 2223 2224 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2225 return self._parse_wrapped_csv(self._parse_property) 2226 2227 def _parse_property(self) -> t.Optional[exp.Expression]: 2228 if self._match_texts(self.PROPERTY_PARSERS): 2229 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2230 2231 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2232 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2233 2234 if self._match_text_seq("COMPOUND", "SORTKEY"): 2235 return self._parse_sortkey(compound=True) 2236 2237 if self._match_text_seq("SQL", "SECURITY"): 2238 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2239 2240 index = self._index 2241 2242 seq_props = self._parse_sequence_properties() 2243 if seq_props: 2244 return seq_props 2245 2246 self._retreat(index) 2247 key = self._parse_column() 2248 2249 if not self._match(TokenType.EQ): 2250 self._retreat(index) 2251 return None 2252 2253 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2254 if isinstance(key, exp.Column): 2255 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2256 2257 value = self._parse_bitwise() or self._parse_var(any_token=True) 2258 2259 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2260 if isinstance(value, exp.Column): 2261 value = exp.var(value.name) 2262 2263 return self.expression(exp.Property, this=key, value=value) 2264 2265 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2266 if self._match_text_seq("BY"): 2267 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2268 2269 self._match(TokenType.ALIAS) 2270 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2271 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2272 2273 return self.expression( 2274 exp.FileFormatProperty, 2275 this=( 2276 self.expression( 2277 exp.InputOutputFormat, 2278 input_format=input_format, 2279 output_format=output_format, 2280 ) 2281 if input_format or output_format 2282 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2283 ), 2284 hive_format=True, 2285 ) 2286 2287 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2288 field = self._parse_field() 2289 if isinstance(field, exp.Identifier) and not field.quoted: 2290 field = exp.var(field) 2291 2292 return field 2293 2294 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2295 self._match(TokenType.EQ) 2296 self._match(TokenType.ALIAS) 2297 2298 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2299 2300 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2301 properties = [] 2302 while True: 2303 if before: 2304 prop = self._parse_property_before() 2305 else: 2306 prop = self._parse_property() 2307 if not prop: 2308 break 2309 for p in ensure_list(prop): 2310 properties.append(p) 2311 2312 if properties: 2313 return self.expression(exp.Properties, expressions=properties) 2314 2315 return None 2316 2317 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2318 return self.expression( 2319 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2320 ) 2321 2322 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2323 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2324 security_specifier = self._prev.text.upper() 2325 return self.expression(exp.SecurityProperty, this=security_specifier) 2326 return None 2327 2328 def _parse_settings_property(self) -> exp.SettingsProperty: 2329 return self.expression( 2330 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2331 ) 2332 2333 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2334 if self._index >= 2: 2335 pre_volatile_token = self._tokens[self._index - 2] 2336 else: 2337 pre_volatile_token = None 2338 2339 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2340 return exp.VolatileProperty() 2341 2342 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2343 2344 def _parse_retention_period(self) -> exp.Var: 2345 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2346 number = self._parse_number() 2347 number_str = f"{number} " if number else "" 2348 unit = self._parse_var(any_token=True) 2349 return exp.var(f"{number_str}{unit}") 2350 2351 def _parse_system_versioning_property( 2352 self, with_: bool = False 2353 ) -> exp.WithSystemVersioningProperty: 2354 self._match(TokenType.EQ) 2355 prop = self.expression( 2356 exp.WithSystemVersioningProperty, 2357 **{ # type: ignore 2358 "on": True, 2359 "with": with_, 2360 }, 2361 ) 2362 2363 if self._match_text_seq("OFF"): 2364 prop.set("on", False) 2365 return prop 2366 2367 self._match(TokenType.ON) 2368 if self._match(TokenType.L_PAREN): 2369 while self._curr and not self._match(TokenType.R_PAREN): 2370 if self._match_text_seq("HISTORY_TABLE", "="): 2371 prop.set("this", self._parse_table_parts()) 2372 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2373 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2374 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2375 prop.set("retention_period", self._parse_retention_period()) 2376 2377 self._match(TokenType.COMMA) 2378 2379 return prop 2380 2381 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2382 self._match(TokenType.EQ) 2383 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2384 prop = self.expression(exp.DataDeletionProperty, on=on) 2385 2386 if self._match(TokenType.L_PAREN): 2387 while self._curr and not self._match(TokenType.R_PAREN): 2388 if self._match_text_seq("FILTER_COLUMN", "="): 2389 prop.set("filter_column", self._parse_column()) 2390 elif self._match_text_seq("RETENTION_PERIOD", "="): 2391 prop.set("retention_period", self._parse_retention_period()) 2392 2393 self._match(TokenType.COMMA) 2394 2395 return prop 2396 2397 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2398 kind = "HASH" 2399 expressions: t.Optional[t.List[exp.Expression]] = None 2400 if self._match_text_seq("BY", "HASH"): 2401 expressions = self._parse_wrapped_csv(self._parse_id_var) 2402 elif self._match_text_seq("BY", "RANDOM"): 2403 kind = "RANDOM" 2404 2405 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2406 buckets: t.Optional[exp.Expression] = None 2407 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2408 buckets = self._parse_number() 2409 2410 return self.expression( 2411 exp.DistributedByProperty, 2412 expressions=expressions, 2413 kind=kind, 2414 buckets=buckets, 2415 order=self._parse_order(), 2416 ) 2417 2418 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2419 self._match_text_seq("KEY") 2420 expressions = self._parse_wrapped_id_vars() 2421 return self.expression(expr_type, expressions=expressions) 2422 2423 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2424 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2425 prop = self._parse_system_versioning_property(with_=True) 2426 self._match_r_paren() 2427 return prop 2428 2429 if self._match(TokenType.L_PAREN, advance=False): 2430 return self._parse_wrapped_properties() 2431 2432 if self._match_text_seq("JOURNAL"): 2433 return self._parse_withjournaltable() 2434 2435 if self._match_texts(self.VIEW_ATTRIBUTES): 2436 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2437 2438 if self._match_text_seq("DATA"): 2439 return self._parse_withdata(no=False) 2440 elif self._match_text_seq("NO", "DATA"): 2441 return self._parse_withdata(no=True) 2442 2443 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2444 return self._parse_serde_properties(with_=True) 2445 2446 if self._match(TokenType.SCHEMA): 2447 return self.expression( 2448 exp.WithSchemaBindingProperty, 2449 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2450 ) 2451 2452 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2453 return self.expression( 2454 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2455 ) 2456 2457 if not self._next: 2458 return None 2459 2460 return self._parse_withisolatedloading() 2461 2462 def _parse_procedure_option(self) -> exp.Expression | None: 2463 if self._match_text_seq("EXECUTE", "AS"): 2464 return self.expression( 2465 exp.ExecuteAsProperty, 2466 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2467 or self._parse_string(), 2468 ) 2469 2470 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2471 2472 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2473 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2474 self._match(TokenType.EQ) 2475 2476 user = self._parse_id_var() 2477 self._match(TokenType.PARAMETER) 2478 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2479 2480 if not user or not host: 2481 return None 2482 2483 return exp.DefinerProperty(this=f"{user}@{host}") 2484 2485 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2486 self._match(TokenType.TABLE) 2487 self._match(TokenType.EQ) 2488 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2489 2490 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2491 return self.expression(exp.LogProperty, no=no) 2492 2493 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2494 return self.expression(exp.JournalProperty, **kwargs) 2495 2496 def _parse_checksum(self) -> exp.ChecksumProperty: 2497 self._match(TokenType.EQ) 2498 2499 on = None 2500 if self._match(TokenType.ON): 2501 on = True 2502 elif self._match_text_seq("OFF"): 2503 on = False 2504 2505 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2506 2507 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2508 return self.expression( 2509 exp.Cluster, 2510 expressions=( 2511 self._parse_wrapped_csv(self._parse_ordered) 2512 if wrapped 2513 else self._parse_csv(self._parse_ordered) 2514 ), 2515 ) 2516 2517 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2518 self._match_text_seq("BY") 2519 2520 self._match_l_paren() 2521 expressions = self._parse_csv(self._parse_column) 2522 self._match_r_paren() 2523 2524 if self._match_text_seq("SORTED", "BY"): 2525 self._match_l_paren() 2526 sorted_by = self._parse_csv(self._parse_ordered) 2527 self._match_r_paren() 2528 else: 2529 sorted_by = None 2530 2531 self._match(TokenType.INTO) 2532 buckets = self._parse_number() 2533 self._match_text_seq("BUCKETS") 2534 2535 return self.expression( 2536 exp.ClusteredByProperty, 2537 expressions=expressions, 2538 sorted_by=sorted_by, 2539 buckets=buckets, 2540 ) 2541 2542 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2543 if not self._match_text_seq("GRANTS"): 2544 self._retreat(self._index - 1) 2545 return None 2546 2547 return self.expression(exp.CopyGrantsProperty) 2548 2549 def _parse_freespace(self) -> exp.FreespaceProperty: 2550 self._match(TokenType.EQ) 2551 return self.expression( 2552 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2553 ) 2554 2555 def _parse_mergeblockratio( 2556 self, no: bool = False, default: bool = False 2557 ) -> exp.MergeBlockRatioProperty: 2558 if self._match(TokenType.EQ): 2559 return self.expression( 2560 exp.MergeBlockRatioProperty, 2561 this=self._parse_number(), 2562 percent=self._match(TokenType.PERCENT), 2563 ) 2564 2565 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2566 2567 def _parse_datablocksize( 2568 self, 2569 default: t.Optional[bool] = None, 2570 minimum: t.Optional[bool] = None, 2571 maximum: t.Optional[bool] = None, 2572 ) -> exp.DataBlocksizeProperty: 2573 self._match(TokenType.EQ) 2574 size = self._parse_number() 2575 2576 units = None 2577 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2578 units = self._prev.text 2579 2580 return self.expression( 2581 exp.DataBlocksizeProperty, 2582 size=size, 2583 units=units, 2584 default=default, 2585 minimum=minimum, 2586 maximum=maximum, 2587 ) 2588 2589 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2590 self._match(TokenType.EQ) 2591 always = self._match_text_seq("ALWAYS") 2592 manual = self._match_text_seq("MANUAL") 2593 never = self._match_text_seq("NEVER") 2594 default = self._match_text_seq("DEFAULT") 2595 2596 autotemp = None 2597 if self._match_text_seq("AUTOTEMP"): 2598 autotemp = self._parse_schema() 2599 2600 return self.expression( 2601 exp.BlockCompressionProperty, 2602 always=always, 2603 manual=manual, 2604 never=never, 2605 default=default, 2606 autotemp=autotemp, 2607 ) 2608 2609 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2610 index = self._index 2611 no = self._match_text_seq("NO") 2612 concurrent = self._match_text_seq("CONCURRENT") 2613 2614 if not self._match_text_seq("ISOLATED", "LOADING"): 2615 self._retreat(index) 2616 return None 2617 2618 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2619 return self.expression( 2620 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2621 ) 2622 2623 def _parse_locking(self) -> exp.LockingProperty: 2624 if self._match(TokenType.TABLE): 2625 kind = "TABLE" 2626 elif self._match(TokenType.VIEW): 2627 kind = "VIEW" 2628 elif self._match(TokenType.ROW): 2629 kind = "ROW" 2630 elif self._match_text_seq("DATABASE"): 2631 kind = "DATABASE" 2632 else: 2633 kind = None 2634 2635 if kind in ("DATABASE", "TABLE", "VIEW"): 2636 this = self._parse_table_parts() 2637 else: 2638 this = None 2639 2640 if self._match(TokenType.FOR): 2641 for_or_in = "FOR" 2642 elif self._match(TokenType.IN): 2643 for_or_in = "IN" 2644 else: 2645 for_or_in = None 2646 2647 if self._match_text_seq("ACCESS"): 2648 lock_type = "ACCESS" 2649 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2650 lock_type = "EXCLUSIVE" 2651 elif self._match_text_seq("SHARE"): 2652 lock_type = "SHARE" 2653 elif self._match_text_seq("READ"): 2654 lock_type = "READ" 2655 elif self._match_text_seq("WRITE"): 2656 lock_type = "WRITE" 2657 elif self._match_text_seq("CHECKSUM"): 2658 lock_type = "CHECKSUM" 2659 else: 2660 lock_type = None 2661 2662 override = self._match_text_seq("OVERRIDE") 2663 2664 return self.expression( 2665 exp.LockingProperty, 2666 this=this, 2667 kind=kind, 2668 for_or_in=for_or_in, 2669 lock_type=lock_type, 2670 override=override, 2671 ) 2672 2673 def _parse_partition_by(self) -> t.List[exp.Expression]: 2674 if self._match(TokenType.PARTITION_BY): 2675 return self._parse_csv(self._parse_assignment) 2676 return [] 2677 2678 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2679 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2680 if self._match_text_seq("MINVALUE"): 2681 return exp.var("MINVALUE") 2682 if self._match_text_seq("MAXVALUE"): 2683 return exp.var("MAXVALUE") 2684 return self._parse_bitwise() 2685 2686 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2687 expression = None 2688 from_expressions = None 2689 to_expressions = None 2690 2691 if self._match(TokenType.IN): 2692 this = self._parse_wrapped_csv(self._parse_bitwise) 2693 elif self._match(TokenType.FROM): 2694 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2695 self._match_text_seq("TO") 2696 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2697 elif self._match_text_seq("WITH", "(", "MODULUS"): 2698 this = self._parse_number() 2699 self._match_text_seq(",", "REMAINDER") 2700 expression = self._parse_number() 2701 self._match_r_paren() 2702 else: 2703 self.raise_error("Failed to parse partition bound spec.") 2704 2705 return self.expression( 2706 exp.PartitionBoundSpec, 2707 this=this, 2708 expression=expression, 2709 from_expressions=from_expressions, 2710 to_expressions=to_expressions, 2711 ) 2712 2713 # https://www.postgresql.org/docs/current/sql-createtable.html 2714 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2715 if not self._match_text_seq("OF"): 2716 self._retreat(self._index - 1) 2717 return None 2718 2719 this = self._parse_table(schema=True) 2720 2721 if self._match(TokenType.DEFAULT): 2722 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2723 elif self._match_text_seq("FOR", "VALUES"): 2724 expression = self._parse_partition_bound_spec() 2725 else: 2726 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2727 2728 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2729 2730 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2731 self._match(TokenType.EQ) 2732 return self.expression( 2733 exp.PartitionedByProperty, 2734 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2735 ) 2736 2737 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2738 if self._match_text_seq("AND", "STATISTICS"): 2739 statistics = True 2740 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2741 statistics = False 2742 else: 2743 statistics = None 2744 2745 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2746 2747 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2748 if self._match_text_seq("SQL"): 2749 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2750 return None 2751 2752 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2753 if self._match_text_seq("SQL", "DATA"): 2754 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2755 return None 2756 2757 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2758 if self._match_text_seq("PRIMARY", "INDEX"): 2759 return exp.NoPrimaryIndexProperty() 2760 if self._match_text_seq("SQL"): 2761 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2762 return None 2763 2764 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2765 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2766 return exp.OnCommitProperty() 2767 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2768 return exp.OnCommitProperty(delete=True) 2769 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2770 2771 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2772 if self._match_text_seq("SQL", "DATA"): 2773 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2774 return None 2775 2776 def _parse_distkey(self) -> exp.DistKeyProperty: 2777 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2778 2779 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2780 table = self._parse_table(schema=True) 2781 2782 options = [] 2783 while self._match_texts(("INCLUDING", "EXCLUDING")): 2784 this = self._prev.text.upper() 2785 2786 id_var = self._parse_id_var() 2787 if not id_var: 2788 return None 2789 2790 options.append( 2791 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2792 ) 2793 2794 return self.expression(exp.LikeProperty, this=table, expressions=options) 2795 2796 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2797 return self.expression( 2798 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2799 ) 2800 2801 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2802 self._match(TokenType.EQ) 2803 return self.expression( 2804 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2805 ) 2806 2807 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2808 self._match_text_seq("WITH", "CONNECTION") 2809 return self.expression( 2810 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2811 ) 2812 2813 def _parse_returns(self) -> exp.ReturnsProperty: 2814 value: t.Optional[exp.Expression] 2815 null = None 2816 is_table = self._match(TokenType.TABLE) 2817 2818 if is_table: 2819 if self._match(TokenType.LT): 2820 value = self.expression( 2821 exp.Schema, 2822 this="TABLE", 2823 expressions=self._parse_csv(self._parse_struct_types), 2824 ) 2825 if not self._match(TokenType.GT): 2826 self.raise_error("Expecting >") 2827 else: 2828 value = self._parse_schema(exp.var("TABLE")) 2829 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2830 null = True 2831 value = None 2832 else: 2833 value = self._parse_types() 2834 2835 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2836 2837 def _parse_describe(self) -> exp.Describe: 2838 kind = self._match_set(self.CREATABLES) and self._prev.text 2839 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2840 if self._match(TokenType.DOT): 2841 style = None 2842 self._retreat(self._index - 2) 2843 2844 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2845 2846 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2847 this = self._parse_statement() 2848 else: 2849 this = self._parse_table(schema=True) 2850 2851 properties = self._parse_properties() 2852 expressions = properties.expressions if properties else None 2853 partition = self._parse_partition() 2854 return self.expression( 2855 exp.Describe, 2856 this=this, 2857 style=style, 2858 kind=kind, 2859 expressions=expressions, 2860 partition=partition, 2861 format=format, 2862 ) 2863 2864 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2865 kind = self._prev.text.upper() 2866 expressions = [] 2867 2868 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2869 if self._match(TokenType.WHEN): 2870 expression = self._parse_disjunction() 2871 self._match(TokenType.THEN) 2872 else: 2873 expression = None 2874 2875 else_ = self._match(TokenType.ELSE) 2876 2877 if not self._match(TokenType.INTO): 2878 return None 2879 2880 return self.expression( 2881 exp.ConditionalInsert, 2882 this=self.expression( 2883 exp.Insert, 2884 this=self._parse_table(schema=True), 2885 expression=self._parse_derived_table_values(), 2886 ), 2887 expression=expression, 2888 else_=else_, 2889 ) 2890 2891 expression = parse_conditional_insert() 2892 while expression is not None: 2893 expressions.append(expression) 2894 expression = parse_conditional_insert() 2895 2896 return self.expression( 2897 exp.MultitableInserts, 2898 kind=kind, 2899 comments=comments, 2900 expressions=expressions, 2901 source=self._parse_table(), 2902 ) 2903 2904 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2905 comments = [] 2906 hint = self._parse_hint() 2907 overwrite = self._match(TokenType.OVERWRITE) 2908 ignore = self._match(TokenType.IGNORE) 2909 local = self._match_text_seq("LOCAL") 2910 alternative = None 2911 is_function = None 2912 2913 if self._match_text_seq("DIRECTORY"): 2914 this: t.Optional[exp.Expression] = self.expression( 2915 exp.Directory, 2916 this=self._parse_var_or_string(), 2917 local=local, 2918 row_format=self._parse_row_format(match_row=True), 2919 ) 2920 else: 2921 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2922 comments += ensure_list(self._prev_comments) 2923 return self._parse_multitable_inserts(comments) 2924 2925 if self._match(TokenType.OR): 2926 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2927 2928 self._match(TokenType.INTO) 2929 comments += ensure_list(self._prev_comments) 2930 self._match(TokenType.TABLE) 2931 is_function = self._match(TokenType.FUNCTION) 2932 2933 this = ( 2934 self._parse_table(schema=True, parse_partition=True) 2935 if not is_function 2936 else self._parse_function() 2937 ) 2938 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2939 this.set("alias", self._parse_table_alias()) 2940 2941 returning = self._parse_returning() 2942 2943 return self.expression( 2944 exp.Insert, 2945 comments=comments, 2946 hint=hint, 2947 is_function=is_function, 2948 this=this, 2949 stored=self._match_text_seq("STORED") and self._parse_stored(), 2950 by_name=self._match_text_seq("BY", "NAME"), 2951 exists=self._parse_exists(), 2952 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2953 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2954 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2955 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2956 conflict=self._parse_on_conflict(), 2957 returning=returning or self._parse_returning(), 2958 overwrite=overwrite, 2959 alternative=alternative, 2960 ignore=ignore, 2961 source=self._match(TokenType.TABLE) and self._parse_table(), 2962 ) 2963 2964 def _parse_kill(self) -> exp.Kill: 2965 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2966 2967 return self.expression( 2968 exp.Kill, 2969 this=self._parse_primary(), 2970 kind=kind, 2971 ) 2972 2973 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2974 conflict = self._match_text_seq("ON", "CONFLICT") 2975 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2976 2977 if not conflict and not duplicate: 2978 return None 2979 2980 conflict_keys = None 2981 constraint = None 2982 2983 if conflict: 2984 if self._match_text_seq("ON", "CONSTRAINT"): 2985 constraint = self._parse_id_var() 2986 elif self._match(TokenType.L_PAREN): 2987 conflict_keys = self._parse_csv(self._parse_id_var) 2988 self._match_r_paren() 2989 2990 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2991 if self._prev.token_type == TokenType.UPDATE: 2992 self._match(TokenType.SET) 2993 expressions = self._parse_csv(self._parse_equality) 2994 else: 2995 expressions = None 2996 2997 return self.expression( 2998 exp.OnConflict, 2999 duplicate=duplicate, 3000 expressions=expressions, 3001 action=action, 3002 conflict_keys=conflict_keys, 3003 constraint=constraint, 3004 where=self._parse_where(), 3005 ) 3006 3007 def _parse_returning(self) -> t.Optional[exp.Returning]: 3008 if not self._match(TokenType.RETURNING): 3009 return None 3010 return self.expression( 3011 exp.Returning, 3012 expressions=self._parse_csv(self._parse_expression), 3013 into=self._match(TokenType.INTO) and self._parse_table_part(), 3014 ) 3015 3016 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3017 if not self._match(TokenType.FORMAT): 3018 return None 3019 return self._parse_row_format() 3020 3021 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3022 index = self._index 3023 with_ = with_ or self._match_text_seq("WITH") 3024 3025 if not self._match(TokenType.SERDE_PROPERTIES): 3026 self._retreat(index) 3027 return None 3028 return self.expression( 3029 exp.SerdeProperties, 3030 **{ # type: ignore 3031 "expressions": self._parse_wrapped_properties(), 3032 "with": with_, 3033 }, 3034 ) 3035 3036 def _parse_row_format( 3037 self, match_row: bool = False 3038 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3039 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3040 return None 3041 3042 if self._match_text_seq("SERDE"): 3043 this = self._parse_string() 3044 3045 serde_properties = self._parse_serde_properties() 3046 3047 return self.expression( 3048 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3049 ) 3050 3051 self._match_text_seq("DELIMITED") 3052 3053 kwargs = {} 3054 3055 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3056 kwargs["fields"] = self._parse_string() 3057 if self._match_text_seq("ESCAPED", "BY"): 3058 kwargs["escaped"] = self._parse_string() 3059 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3060 kwargs["collection_items"] = self._parse_string() 3061 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3062 kwargs["map_keys"] = self._parse_string() 3063 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3064 kwargs["lines"] = self._parse_string() 3065 if self._match_text_seq("NULL", "DEFINED", "AS"): 3066 kwargs["null"] = self._parse_string() 3067 3068 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3069 3070 def _parse_load(self) -> exp.LoadData | exp.Command: 3071 if self._match_text_seq("DATA"): 3072 local = self._match_text_seq("LOCAL") 3073 self._match_text_seq("INPATH") 3074 inpath = self._parse_string() 3075 overwrite = self._match(TokenType.OVERWRITE) 3076 self._match_pair(TokenType.INTO, TokenType.TABLE) 3077 3078 return self.expression( 3079 exp.LoadData, 3080 this=self._parse_table(schema=True), 3081 local=local, 3082 overwrite=overwrite, 3083 inpath=inpath, 3084 partition=self._parse_partition(), 3085 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3086 serde=self._match_text_seq("SERDE") and self._parse_string(), 3087 ) 3088 return self._parse_as_command(self._prev) 3089 3090 def _parse_delete(self) -> exp.Delete: 3091 # This handles MySQL's "Multiple-Table Syntax" 3092 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3093 tables = None 3094 if not self._match(TokenType.FROM, advance=False): 3095 tables = self._parse_csv(self._parse_table) or None 3096 3097 returning = self._parse_returning() 3098 3099 return self.expression( 3100 exp.Delete, 3101 tables=tables, 3102 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3103 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3104 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3105 where=self._parse_where(), 3106 returning=returning or self._parse_returning(), 3107 limit=self._parse_limit(), 3108 ) 3109 3110 def _parse_update(self) -> exp.Update: 3111 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3112 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3113 returning = self._parse_returning() 3114 return self.expression( 3115 exp.Update, 3116 **{ # type: ignore 3117 "this": this, 3118 "expressions": expressions, 3119 "from": self._parse_from(joins=True), 3120 "where": self._parse_where(), 3121 "returning": returning or self._parse_returning(), 3122 "order": self._parse_order(), 3123 "limit": self._parse_limit(), 3124 }, 3125 ) 3126 3127 def _parse_use(self) -> exp.Use: 3128 return self.expression( 3129 exp.Use, 3130 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3131 this=self._parse_table(schema=False), 3132 ) 3133 3134 def _parse_uncache(self) -> exp.Uncache: 3135 if not self._match(TokenType.TABLE): 3136 self.raise_error("Expecting TABLE after UNCACHE") 3137 3138 return self.expression( 3139 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3140 ) 3141 3142 def _parse_cache(self) -> exp.Cache: 3143 lazy = self._match_text_seq("LAZY") 3144 self._match(TokenType.TABLE) 3145 table = self._parse_table(schema=True) 3146 3147 options = [] 3148 if self._match_text_seq("OPTIONS"): 3149 self._match_l_paren() 3150 k = self._parse_string() 3151 self._match(TokenType.EQ) 3152 v = self._parse_string() 3153 options = [k, v] 3154 self._match_r_paren() 3155 3156 self._match(TokenType.ALIAS) 3157 return self.expression( 3158 exp.Cache, 3159 this=table, 3160 lazy=lazy, 3161 options=options, 3162 expression=self._parse_select(nested=True), 3163 ) 3164 3165 def _parse_partition(self) -> t.Optional[exp.Partition]: 3166 if not self._match_texts(self.PARTITION_KEYWORDS): 3167 return None 3168 3169 return self.expression( 3170 exp.Partition, 3171 subpartition=self._prev.text.upper() == "SUBPARTITION", 3172 expressions=self._parse_wrapped_csv(self._parse_assignment), 3173 ) 3174 3175 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3176 def _parse_value_expression() -> t.Optional[exp.Expression]: 3177 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3178 return exp.var(self._prev.text.upper()) 3179 return self._parse_expression() 3180 3181 if self._match(TokenType.L_PAREN): 3182 expressions = self._parse_csv(_parse_value_expression) 3183 self._match_r_paren() 3184 return self.expression(exp.Tuple, expressions=expressions) 3185 3186 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3187 expression = self._parse_expression() 3188 if expression: 3189 return self.expression(exp.Tuple, expressions=[expression]) 3190 return None 3191 3192 def _parse_projections(self) -> t.List[exp.Expression]: 3193 return self._parse_expressions() 3194 3195 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3196 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3197 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3198 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3199 ) 3200 elif self._match(TokenType.FROM): 3201 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3202 # Support parentheses for duckdb FROM-first syntax 3203 select = self._parse_select() 3204 if select: 3205 select.set("from", from_) 3206 this = select 3207 else: 3208 this = exp.select("*").from_(t.cast(exp.From, from_)) 3209 else: 3210 this = ( 3211 self._parse_table(consume_pipe=True) 3212 if table 3213 else self._parse_select(nested=True, parse_set_operation=False) 3214 ) 3215 3216 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3217 # in case a modifier (e.g. join) is following 3218 if table and isinstance(this, exp.Values) and this.alias: 3219 alias = this.args["alias"].pop() 3220 this = exp.Table(this=this, alias=alias) 3221 3222 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3223 3224 return this 3225 3226 def _parse_select( 3227 self, 3228 nested: bool = False, 3229 table: bool = False, 3230 parse_subquery_alias: bool = True, 3231 parse_set_operation: bool = True, 3232 consume_pipe: bool = True, 3233 ) -> t.Optional[exp.Expression]: 3234 query = self._parse_select_query( 3235 nested=nested, 3236 table=table, 3237 parse_subquery_alias=parse_subquery_alias, 3238 parse_set_operation=parse_set_operation, 3239 ) 3240 3241 if ( 3242 consume_pipe 3243 and self._match(TokenType.PIPE_GT, advance=False) 3244 and isinstance(query, exp.Query) 3245 ): 3246 query = self._parse_pipe_syntax_query(query) 3247 query = query.subquery(copy=False) if query and table else query 3248 3249 return query 3250 3251 def _parse_select_query( 3252 self, 3253 nested: bool = False, 3254 table: bool = False, 3255 parse_subquery_alias: bool = True, 3256 parse_set_operation: bool = True, 3257 ) -> t.Optional[exp.Expression]: 3258 cte = self._parse_with() 3259 3260 if cte: 3261 this = self._parse_statement() 3262 3263 if not this: 3264 self.raise_error("Failed to parse any statement following CTE") 3265 return cte 3266 3267 if "with" in this.arg_types: 3268 this.set("with", cte) 3269 else: 3270 self.raise_error(f"{this.key} does not support CTE") 3271 this = cte 3272 3273 return this 3274 3275 # duckdb supports leading with FROM x 3276 from_ = ( 3277 self._parse_from(consume_pipe=True) 3278 if self._match(TokenType.FROM, advance=False) 3279 else None 3280 ) 3281 3282 if self._match(TokenType.SELECT): 3283 comments = self._prev_comments 3284 3285 hint = self._parse_hint() 3286 3287 if self._next and not self._next.token_type == TokenType.DOT: 3288 all_ = self._match(TokenType.ALL) 3289 distinct = self._match_set(self.DISTINCT_TOKENS) 3290 else: 3291 all_, distinct = None, None 3292 3293 kind = ( 3294 self._match(TokenType.ALIAS) 3295 and self._match_texts(("STRUCT", "VALUE")) 3296 and self._prev.text.upper() 3297 ) 3298 3299 if distinct: 3300 distinct = self.expression( 3301 exp.Distinct, 3302 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3303 ) 3304 3305 if all_ and distinct: 3306 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3307 3308 operation_modifiers = [] 3309 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3310 operation_modifiers.append(exp.var(self._prev.text.upper())) 3311 3312 limit = self._parse_limit(top=True) 3313 projections = self._parse_projections() 3314 3315 this = self.expression( 3316 exp.Select, 3317 kind=kind, 3318 hint=hint, 3319 distinct=distinct, 3320 expressions=projections, 3321 limit=limit, 3322 operation_modifiers=operation_modifiers or None, 3323 ) 3324 this.comments = comments 3325 3326 into = self._parse_into() 3327 if into: 3328 this.set("into", into) 3329 3330 if not from_: 3331 from_ = self._parse_from() 3332 3333 if from_: 3334 this.set("from", from_) 3335 3336 this = self._parse_query_modifiers(this) 3337 elif (table or nested) and self._match(TokenType.L_PAREN): 3338 this = self._parse_wrapped_select(table=table) 3339 3340 # We return early here so that the UNION isn't attached to the subquery by the 3341 # following call to _parse_set_operations, but instead becomes the parent node 3342 self._match_r_paren() 3343 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3344 elif self._match(TokenType.VALUES, advance=False): 3345 this = self._parse_derived_table_values() 3346 elif from_: 3347 this = exp.select("*").from_(from_.this, copy=False) 3348 elif self._match(TokenType.SUMMARIZE): 3349 table = self._match(TokenType.TABLE) 3350 this = self._parse_select() or self._parse_string() or self._parse_table() 3351 return self.expression(exp.Summarize, this=this, table=table) 3352 elif self._match(TokenType.DESCRIBE): 3353 this = self._parse_describe() 3354 elif self._match_text_seq("STREAM"): 3355 this = self._parse_function() 3356 if this: 3357 this = self.expression(exp.Stream, this=this) 3358 else: 3359 self._retreat(self._index - 1) 3360 else: 3361 this = None 3362 3363 return self._parse_set_operations(this) if parse_set_operation else this 3364 3365 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3366 self._match_text_seq("SEARCH") 3367 3368 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3369 3370 if not kind: 3371 return None 3372 3373 self._match_text_seq("FIRST", "BY") 3374 3375 return self.expression( 3376 exp.RecursiveWithSearch, 3377 kind=kind, 3378 this=self._parse_id_var(), 3379 expression=self._match_text_seq("SET") and self._parse_id_var(), 3380 using=self._match_text_seq("USING") and self._parse_id_var(), 3381 ) 3382 3383 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3384 if not skip_with_token and not self._match(TokenType.WITH): 3385 return None 3386 3387 comments = self._prev_comments 3388 recursive = self._match(TokenType.RECURSIVE) 3389 3390 last_comments = None 3391 expressions = [] 3392 while True: 3393 cte = self._parse_cte() 3394 if isinstance(cte, exp.CTE): 3395 expressions.append(cte) 3396 if last_comments: 3397 cte.add_comments(last_comments) 3398 3399 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3400 break 3401 else: 3402 self._match(TokenType.WITH) 3403 3404 last_comments = self._prev_comments 3405 3406 return self.expression( 3407 exp.With, 3408 comments=comments, 3409 expressions=expressions, 3410 recursive=recursive, 3411 search=self._parse_recursive_with_search(), 3412 ) 3413 3414 def _parse_cte(self) -> t.Optional[exp.CTE]: 3415 index = self._index 3416 3417 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3418 if not alias or not alias.this: 3419 self.raise_error("Expected CTE to have alias") 3420 3421 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3422 self._retreat(index) 3423 return None 3424 3425 comments = self._prev_comments 3426 3427 if self._match_text_seq("NOT", "MATERIALIZED"): 3428 materialized = False 3429 elif self._match_text_seq("MATERIALIZED"): 3430 materialized = True 3431 else: 3432 materialized = None 3433 3434 cte = self.expression( 3435 exp.CTE, 3436 this=self._parse_wrapped(self._parse_statement), 3437 alias=alias, 3438 materialized=materialized, 3439 comments=comments, 3440 ) 3441 3442 values = cte.this 3443 if isinstance(values, exp.Values): 3444 if values.alias: 3445 cte.set("this", exp.select("*").from_(values)) 3446 else: 3447 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3448 3449 return cte 3450 3451 def _parse_table_alias( 3452 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3453 ) -> t.Optional[exp.TableAlias]: 3454 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3455 # so this section tries to parse the clause version and if it fails, it treats the token 3456 # as an identifier (alias) 3457 if self._can_parse_limit_or_offset(): 3458 return None 3459 3460 any_token = self._match(TokenType.ALIAS) 3461 alias = ( 3462 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3463 or self._parse_string_as_identifier() 3464 ) 3465 3466 index = self._index 3467 if self._match(TokenType.L_PAREN): 3468 columns = self._parse_csv(self._parse_function_parameter) 3469 self._match_r_paren() if columns else self._retreat(index) 3470 else: 3471 columns = None 3472 3473 if not alias and not columns: 3474 return None 3475 3476 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3477 3478 # We bubble up comments from the Identifier to the TableAlias 3479 if isinstance(alias, exp.Identifier): 3480 table_alias.add_comments(alias.pop_comments()) 3481 3482 return table_alias 3483 3484 def _parse_subquery( 3485 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3486 ) -> t.Optional[exp.Subquery]: 3487 if not this: 3488 return None 3489 3490 return self.expression( 3491 exp.Subquery, 3492 this=this, 3493 pivots=self._parse_pivots(), 3494 alias=self._parse_table_alias() if parse_alias else None, 3495 sample=self._parse_table_sample(), 3496 ) 3497 3498 def _implicit_unnests_to_explicit(self, this: E) -> E: 3499 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3500 3501 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3502 for i, join in enumerate(this.args.get("joins") or []): 3503 table = join.this 3504 normalized_table = table.copy() 3505 normalized_table.meta["maybe_column"] = True 3506 normalized_table = _norm(normalized_table, dialect=self.dialect) 3507 3508 if isinstance(table, exp.Table) and not join.args.get("on"): 3509 if normalized_table.parts[0].name in refs: 3510 table_as_column = table.to_column() 3511 unnest = exp.Unnest(expressions=[table_as_column]) 3512 3513 # Table.to_column creates a parent Alias node that we want to convert to 3514 # a TableAlias and attach to the Unnest, so it matches the parser's output 3515 if isinstance(table.args.get("alias"), exp.TableAlias): 3516 table_as_column.replace(table_as_column.this) 3517 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3518 3519 table.replace(unnest) 3520 3521 refs.add(normalized_table.alias_or_name) 3522 3523 return this 3524 3525 def _parse_query_modifiers( 3526 self, this: t.Optional[exp.Expression] 3527 ) -> t.Optional[exp.Expression]: 3528 if isinstance(this, self.MODIFIABLES): 3529 for join in self._parse_joins(): 3530 this.append("joins", join) 3531 for lateral in iter(self._parse_lateral, None): 3532 this.append("laterals", lateral) 3533 3534 while True: 3535 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3536 modifier_token = self._curr 3537 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3538 key, expression = parser(self) 3539 3540 if expression: 3541 if this.args.get(key): 3542 self.raise_error( 3543 f"Found multiple '{modifier_token.text.upper()}' clauses", 3544 token=modifier_token, 3545 ) 3546 3547 this.set(key, expression) 3548 if key == "limit": 3549 offset = expression.args.pop("offset", None) 3550 3551 if offset: 3552 offset = exp.Offset(expression=offset) 3553 this.set("offset", offset) 3554 3555 limit_by_expressions = expression.expressions 3556 expression.set("expressions", None) 3557 offset.set("expressions", limit_by_expressions) 3558 continue 3559 break 3560 3561 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3562 this = self._implicit_unnests_to_explicit(this) 3563 3564 return this 3565 3566 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3567 start = self._curr 3568 while self._curr: 3569 self._advance() 3570 3571 end = self._tokens[self._index - 1] 3572 return exp.Hint(expressions=[self._find_sql(start, end)]) 3573 3574 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3575 return self._parse_function_call() 3576 3577 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3578 start_index = self._index 3579 should_fallback_to_string = False 3580 3581 hints = [] 3582 try: 3583 for hint in iter( 3584 lambda: self._parse_csv( 3585 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3586 ), 3587 [], 3588 ): 3589 hints.extend(hint) 3590 except ParseError: 3591 should_fallback_to_string = True 3592 3593 if should_fallback_to_string or self._curr: 3594 self._retreat(start_index) 3595 return self._parse_hint_fallback_to_string() 3596 3597 return self.expression(exp.Hint, expressions=hints) 3598 3599 def _parse_hint(self) -> t.Optional[exp.Hint]: 3600 if self._match(TokenType.HINT) and self._prev_comments: 3601 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3602 3603 return None 3604 3605 def _parse_into(self) -> t.Optional[exp.Into]: 3606 if not self._match(TokenType.INTO): 3607 return None 3608 3609 temp = self._match(TokenType.TEMPORARY) 3610 unlogged = self._match_text_seq("UNLOGGED") 3611 self._match(TokenType.TABLE) 3612 3613 return self.expression( 3614 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3615 ) 3616 3617 def _parse_from( 3618 self, 3619 joins: bool = False, 3620 skip_from_token: bool = False, 3621 consume_pipe: bool = False, 3622 ) -> t.Optional[exp.From]: 3623 if not skip_from_token and not self._match(TokenType.FROM): 3624 return None 3625 3626 return self.expression( 3627 exp.From, 3628 comments=self._prev_comments, 3629 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3630 ) 3631 3632 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3633 return self.expression( 3634 exp.MatchRecognizeMeasure, 3635 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3636 this=self._parse_expression(), 3637 ) 3638 3639 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3640 if not self._match(TokenType.MATCH_RECOGNIZE): 3641 return None 3642 3643 self._match_l_paren() 3644 3645 partition = self._parse_partition_by() 3646 order = self._parse_order() 3647 3648 measures = ( 3649 self._parse_csv(self._parse_match_recognize_measure) 3650 if self._match_text_seq("MEASURES") 3651 else None 3652 ) 3653 3654 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3655 rows = exp.var("ONE ROW PER MATCH") 3656 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3657 text = "ALL ROWS PER MATCH" 3658 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3659 text += " SHOW EMPTY MATCHES" 3660 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3661 text += " OMIT EMPTY MATCHES" 3662 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3663 text += " WITH UNMATCHED ROWS" 3664 rows = exp.var(text) 3665 else: 3666 rows = None 3667 3668 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3669 text = "AFTER MATCH SKIP" 3670 if self._match_text_seq("PAST", "LAST", "ROW"): 3671 text += " PAST LAST ROW" 3672 elif self._match_text_seq("TO", "NEXT", "ROW"): 3673 text += " TO NEXT ROW" 3674 elif self._match_text_seq("TO", "FIRST"): 3675 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3676 elif self._match_text_seq("TO", "LAST"): 3677 text += f" TO LAST {self._advance_any().text}" # type: ignore 3678 after = exp.var(text) 3679 else: 3680 after = None 3681 3682 if self._match_text_seq("PATTERN"): 3683 self._match_l_paren() 3684 3685 if not self._curr: 3686 self.raise_error("Expecting )", self._curr) 3687 3688 paren = 1 3689 start = self._curr 3690 3691 while self._curr and paren > 0: 3692 if self._curr.token_type == TokenType.L_PAREN: 3693 paren += 1 3694 if self._curr.token_type == TokenType.R_PAREN: 3695 paren -= 1 3696 3697 end = self._prev 3698 self._advance() 3699 3700 if paren > 0: 3701 self.raise_error("Expecting )", self._curr) 3702 3703 pattern = exp.var(self._find_sql(start, end)) 3704 else: 3705 pattern = None 3706 3707 define = ( 3708 self._parse_csv(self._parse_name_as_expression) 3709 if self._match_text_seq("DEFINE") 3710 else None 3711 ) 3712 3713 self._match_r_paren() 3714 3715 return self.expression( 3716 exp.MatchRecognize, 3717 partition_by=partition, 3718 order=order, 3719 measures=measures, 3720 rows=rows, 3721 after=after, 3722 pattern=pattern, 3723 define=define, 3724 alias=self._parse_table_alias(), 3725 ) 3726 3727 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3728 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3729 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3730 cross_apply = False 3731 3732 if cross_apply is not None: 3733 this = self._parse_select(table=True) 3734 view = None 3735 outer = None 3736 elif self._match(TokenType.LATERAL): 3737 this = self._parse_select(table=True) 3738 view = self._match(TokenType.VIEW) 3739 outer = self._match(TokenType.OUTER) 3740 else: 3741 return None 3742 3743 if not this: 3744 this = ( 3745 self._parse_unnest() 3746 or self._parse_function() 3747 or self._parse_id_var(any_token=False) 3748 ) 3749 3750 while self._match(TokenType.DOT): 3751 this = exp.Dot( 3752 this=this, 3753 expression=self._parse_function() or self._parse_id_var(any_token=False), 3754 ) 3755 3756 ordinality: t.Optional[bool] = None 3757 3758 if view: 3759 table = self._parse_id_var(any_token=False) 3760 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3761 table_alias: t.Optional[exp.TableAlias] = self.expression( 3762 exp.TableAlias, this=table, columns=columns 3763 ) 3764 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3765 # We move the alias from the lateral's child node to the lateral itself 3766 table_alias = this.args["alias"].pop() 3767 else: 3768 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3769 table_alias = self._parse_table_alias() 3770 3771 return self.expression( 3772 exp.Lateral, 3773 this=this, 3774 view=view, 3775 outer=outer, 3776 alias=table_alias, 3777 cross_apply=cross_apply, 3778 ordinality=ordinality, 3779 ) 3780 3781 def _parse_join_parts( 3782 self, 3783 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3784 return ( 3785 self._match_set(self.JOIN_METHODS) and self._prev, 3786 self._match_set(self.JOIN_SIDES) and self._prev, 3787 self._match_set(self.JOIN_KINDS) and self._prev, 3788 ) 3789 3790 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3791 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3792 this = self._parse_column() 3793 if isinstance(this, exp.Column): 3794 return this.this 3795 return this 3796 3797 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3798 3799 def _parse_join( 3800 self, skip_join_token: bool = False, parse_bracket: bool = False 3801 ) -> t.Optional[exp.Join]: 3802 if self._match(TokenType.COMMA): 3803 table = self._try_parse(self._parse_table) 3804 cross_join = self.expression(exp.Join, this=table) if table else None 3805 3806 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3807 cross_join.set("kind", "CROSS") 3808 3809 return cross_join 3810 3811 index = self._index 3812 method, side, kind = self._parse_join_parts() 3813 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3814 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3815 join_comments = self._prev_comments 3816 3817 if not skip_join_token and not join: 3818 self._retreat(index) 3819 kind = None 3820 method = None 3821 side = None 3822 3823 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3824 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3825 3826 if not skip_join_token and not join and not outer_apply and not cross_apply: 3827 return None 3828 3829 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3830 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3831 kwargs["expressions"] = self._parse_csv( 3832 lambda: self._parse_table(parse_bracket=parse_bracket) 3833 ) 3834 3835 if method: 3836 kwargs["method"] = method.text 3837 if side: 3838 kwargs["side"] = side.text 3839 if kind: 3840 kwargs["kind"] = kind.text 3841 if hint: 3842 kwargs["hint"] = hint 3843 3844 if self._match(TokenType.MATCH_CONDITION): 3845 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3846 3847 if self._match(TokenType.ON): 3848 kwargs["on"] = self._parse_assignment() 3849 elif self._match(TokenType.USING): 3850 kwargs["using"] = self._parse_using_identifiers() 3851 elif ( 3852 not method 3853 and not (outer_apply or cross_apply) 3854 and not isinstance(kwargs["this"], exp.Unnest) 3855 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3856 ): 3857 index = self._index 3858 joins: t.Optional[list] = list(self._parse_joins()) 3859 3860 if joins and self._match(TokenType.ON): 3861 kwargs["on"] = self._parse_assignment() 3862 elif joins and self._match(TokenType.USING): 3863 kwargs["using"] = self._parse_using_identifiers() 3864 else: 3865 joins = None 3866 self._retreat(index) 3867 3868 kwargs["this"].set("joins", joins if joins else None) 3869 3870 kwargs["pivots"] = self._parse_pivots() 3871 3872 comments = [c for token in (method, side, kind) if token for c in token.comments] 3873 comments = (join_comments or []) + comments 3874 return self.expression(exp.Join, comments=comments, **kwargs) 3875 3876 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3877 this = self._parse_assignment() 3878 3879 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3880 return this 3881 3882 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3883 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3884 3885 return this 3886 3887 def _parse_index_params(self) -> exp.IndexParameters: 3888 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3889 3890 if self._match(TokenType.L_PAREN, advance=False): 3891 columns = self._parse_wrapped_csv(self._parse_with_operator) 3892 else: 3893 columns = None 3894 3895 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3896 partition_by = self._parse_partition_by() 3897 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3898 tablespace = ( 3899 self._parse_var(any_token=True) 3900 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3901 else None 3902 ) 3903 where = self._parse_where() 3904 3905 on = self._parse_field() if self._match(TokenType.ON) else None 3906 3907 return self.expression( 3908 exp.IndexParameters, 3909 using=using, 3910 columns=columns, 3911 include=include, 3912 partition_by=partition_by, 3913 where=where, 3914 with_storage=with_storage, 3915 tablespace=tablespace, 3916 on=on, 3917 ) 3918 3919 def _parse_index( 3920 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3921 ) -> t.Optional[exp.Index]: 3922 if index or anonymous: 3923 unique = None 3924 primary = None 3925 amp = None 3926 3927 self._match(TokenType.ON) 3928 self._match(TokenType.TABLE) # hive 3929 table = self._parse_table_parts(schema=True) 3930 else: 3931 unique = self._match(TokenType.UNIQUE) 3932 primary = self._match_text_seq("PRIMARY") 3933 amp = self._match_text_seq("AMP") 3934 3935 if not self._match(TokenType.INDEX): 3936 return None 3937 3938 index = self._parse_id_var() 3939 table = None 3940 3941 params = self._parse_index_params() 3942 3943 return self.expression( 3944 exp.Index, 3945 this=index, 3946 table=table, 3947 unique=unique, 3948 primary=primary, 3949 amp=amp, 3950 params=params, 3951 ) 3952 3953 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3954 hints: t.List[exp.Expression] = [] 3955 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3956 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3957 hints.append( 3958 self.expression( 3959 exp.WithTableHint, 3960 expressions=self._parse_csv( 3961 lambda: self._parse_function() or self._parse_var(any_token=True) 3962 ), 3963 ) 3964 ) 3965 self._match_r_paren() 3966 else: 3967 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3968 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3969 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3970 3971 self._match_set((TokenType.INDEX, TokenType.KEY)) 3972 if self._match(TokenType.FOR): 3973 hint.set("target", self._advance_any() and self._prev.text.upper()) 3974 3975 hint.set("expressions", self._parse_wrapped_id_vars()) 3976 hints.append(hint) 3977 3978 return hints or None 3979 3980 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3981 return ( 3982 (not schema and self._parse_function(optional_parens=False)) 3983 or self._parse_id_var(any_token=False) 3984 or self._parse_string_as_identifier() 3985 or self._parse_placeholder() 3986 ) 3987 3988 def _parse_table_parts( 3989 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3990 ) -> exp.Table: 3991 catalog = None 3992 db = None 3993 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3994 3995 while self._match(TokenType.DOT): 3996 if catalog: 3997 # This allows nesting the table in arbitrarily many dot expressions if needed 3998 table = self.expression( 3999 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4000 ) 4001 else: 4002 catalog = db 4003 db = table 4004 # "" used for tsql FROM a..b case 4005 table = self._parse_table_part(schema=schema) or "" 4006 4007 if ( 4008 wildcard 4009 and self._is_connected() 4010 and (isinstance(table, exp.Identifier) or not table) 4011 and self._match(TokenType.STAR) 4012 ): 4013 if isinstance(table, exp.Identifier): 4014 table.args["this"] += "*" 4015 else: 4016 table = exp.Identifier(this="*") 4017 4018 # We bubble up comments from the Identifier to the Table 4019 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4020 4021 if is_db_reference: 4022 catalog = db 4023 db = table 4024 table = None 4025 4026 if not table and not is_db_reference: 4027 self.raise_error(f"Expected table name but got {self._curr}") 4028 if not db and is_db_reference: 4029 self.raise_error(f"Expected database name but got {self._curr}") 4030 4031 table = self.expression( 4032 exp.Table, 4033 comments=comments, 4034 this=table, 4035 db=db, 4036 catalog=catalog, 4037 ) 4038 4039 changes = self._parse_changes() 4040 if changes: 4041 table.set("changes", changes) 4042 4043 at_before = self._parse_historical_data() 4044 if at_before: 4045 table.set("when", at_before) 4046 4047 pivots = self._parse_pivots() 4048 if pivots: 4049 table.set("pivots", pivots) 4050 4051 return table 4052 4053 def _parse_table( 4054 self, 4055 schema: bool = False, 4056 joins: bool = False, 4057 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4058 parse_bracket: bool = False, 4059 is_db_reference: bool = False, 4060 parse_partition: bool = False, 4061 consume_pipe: bool = False, 4062 ) -> t.Optional[exp.Expression]: 4063 lateral = self._parse_lateral() 4064 if lateral: 4065 return lateral 4066 4067 unnest = self._parse_unnest() 4068 if unnest: 4069 return unnest 4070 4071 values = self._parse_derived_table_values() 4072 if values: 4073 return values 4074 4075 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4076 if subquery: 4077 if not subquery.args.get("pivots"): 4078 subquery.set("pivots", self._parse_pivots()) 4079 return subquery 4080 4081 bracket = parse_bracket and self._parse_bracket(None) 4082 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4083 4084 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4085 self._parse_table 4086 ) 4087 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4088 4089 only = self._match(TokenType.ONLY) 4090 4091 this = t.cast( 4092 exp.Expression, 4093 bracket 4094 or rows_from 4095 or self._parse_bracket( 4096 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4097 ), 4098 ) 4099 4100 if only: 4101 this.set("only", only) 4102 4103 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4104 self._match_text_seq("*") 4105 4106 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4107 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4108 this.set("partition", self._parse_partition()) 4109 4110 if schema: 4111 return self._parse_schema(this=this) 4112 4113 version = self._parse_version() 4114 4115 if version: 4116 this.set("version", version) 4117 4118 if self.dialect.ALIAS_POST_TABLESAMPLE: 4119 this.set("sample", self._parse_table_sample()) 4120 4121 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4122 if alias: 4123 this.set("alias", alias) 4124 4125 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4126 return self.expression( 4127 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4128 ) 4129 4130 this.set("hints", self._parse_table_hints()) 4131 4132 if not this.args.get("pivots"): 4133 this.set("pivots", self._parse_pivots()) 4134 4135 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4136 this.set("sample", self._parse_table_sample()) 4137 4138 if joins: 4139 for join in self._parse_joins(): 4140 this.append("joins", join) 4141 4142 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4143 this.set("ordinality", True) 4144 this.set("alias", self._parse_table_alias()) 4145 4146 return this 4147 4148 def _parse_version(self) -> t.Optional[exp.Version]: 4149 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4150 this = "TIMESTAMP" 4151 elif self._match(TokenType.VERSION_SNAPSHOT): 4152 this = "VERSION" 4153 else: 4154 return None 4155 4156 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4157 kind = self._prev.text.upper() 4158 start = self._parse_bitwise() 4159 self._match_texts(("TO", "AND")) 4160 end = self._parse_bitwise() 4161 expression: t.Optional[exp.Expression] = self.expression( 4162 exp.Tuple, expressions=[start, end] 4163 ) 4164 elif self._match_text_seq("CONTAINED", "IN"): 4165 kind = "CONTAINED IN" 4166 expression = self.expression( 4167 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4168 ) 4169 elif self._match(TokenType.ALL): 4170 kind = "ALL" 4171 expression = None 4172 else: 4173 self._match_text_seq("AS", "OF") 4174 kind = "AS OF" 4175 expression = self._parse_type() 4176 4177 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4178 4179 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4180 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4181 index = self._index 4182 historical_data = None 4183 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4184 this = self._prev.text.upper() 4185 kind = ( 4186 self._match(TokenType.L_PAREN) 4187 and self._match_texts(self.HISTORICAL_DATA_KIND) 4188 and self._prev.text.upper() 4189 ) 4190 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4191 4192 if expression: 4193 self._match_r_paren() 4194 historical_data = self.expression( 4195 exp.HistoricalData, this=this, kind=kind, expression=expression 4196 ) 4197 else: 4198 self._retreat(index) 4199 4200 return historical_data 4201 4202 def _parse_changes(self) -> t.Optional[exp.Changes]: 4203 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4204 return None 4205 4206 information = self._parse_var(any_token=True) 4207 self._match_r_paren() 4208 4209 return self.expression( 4210 exp.Changes, 4211 information=information, 4212 at_before=self._parse_historical_data(), 4213 end=self._parse_historical_data(), 4214 ) 4215 4216 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4217 if not self._match(TokenType.UNNEST): 4218 return None 4219 4220 expressions = self._parse_wrapped_csv(self._parse_equality) 4221 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4222 4223 alias = self._parse_table_alias() if with_alias else None 4224 4225 if alias: 4226 if self.dialect.UNNEST_COLUMN_ONLY: 4227 if alias.args.get("columns"): 4228 self.raise_error("Unexpected extra column alias in unnest.") 4229 4230 alias.set("columns", [alias.this]) 4231 alias.set("this", None) 4232 4233 columns = alias.args.get("columns") or [] 4234 if offset and len(expressions) < len(columns): 4235 offset = columns.pop() 4236 4237 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4238 self._match(TokenType.ALIAS) 4239 offset = self._parse_id_var( 4240 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4241 ) or exp.to_identifier("offset") 4242 4243 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4244 4245 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4246 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4247 if not is_derived and not ( 4248 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4249 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4250 ): 4251 return None 4252 4253 expressions = self._parse_csv(self._parse_value) 4254 alias = self._parse_table_alias() 4255 4256 if is_derived: 4257 self._match_r_paren() 4258 4259 return self.expression( 4260 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4261 ) 4262 4263 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4264 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4265 as_modifier and self._match_text_seq("USING", "SAMPLE") 4266 ): 4267 return None 4268 4269 bucket_numerator = None 4270 bucket_denominator = None 4271 bucket_field = None 4272 percent = None 4273 size = None 4274 seed = None 4275 4276 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4277 matched_l_paren = self._match(TokenType.L_PAREN) 4278 4279 if self.TABLESAMPLE_CSV: 4280 num = None 4281 expressions = self._parse_csv(self._parse_primary) 4282 else: 4283 expressions = None 4284 num = ( 4285 self._parse_factor() 4286 if self._match(TokenType.NUMBER, advance=False) 4287 else self._parse_primary() or self._parse_placeholder() 4288 ) 4289 4290 if self._match_text_seq("BUCKET"): 4291 bucket_numerator = self._parse_number() 4292 self._match_text_seq("OUT", "OF") 4293 bucket_denominator = bucket_denominator = self._parse_number() 4294 self._match(TokenType.ON) 4295 bucket_field = self._parse_field() 4296 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4297 percent = num 4298 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4299 size = num 4300 else: 4301 percent = num 4302 4303 if matched_l_paren: 4304 self._match_r_paren() 4305 4306 if self._match(TokenType.L_PAREN): 4307 method = self._parse_var(upper=True) 4308 seed = self._match(TokenType.COMMA) and self._parse_number() 4309 self._match_r_paren() 4310 elif self._match_texts(("SEED", "REPEATABLE")): 4311 seed = self._parse_wrapped(self._parse_number) 4312 4313 if not method and self.DEFAULT_SAMPLING_METHOD: 4314 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4315 4316 return self.expression( 4317 exp.TableSample, 4318 expressions=expressions, 4319 method=method, 4320 bucket_numerator=bucket_numerator, 4321 bucket_denominator=bucket_denominator, 4322 bucket_field=bucket_field, 4323 percent=percent, 4324 size=size, 4325 seed=seed, 4326 ) 4327 4328 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4329 return list(iter(self._parse_pivot, None)) or None 4330 4331 def _parse_joins(self) -> t.Iterator[exp.Join]: 4332 return iter(self._parse_join, None) 4333 4334 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4335 if not self._match(TokenType.INTO): 4336 return None 4337 4338 return self.expression( 4339 exp.UnpivotColumns, 4340 this=self._match_text_seq("NAME") and self._parse_column(), 4341 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4342 ) 4343 4344 # https://duckdb.org/docs/sql/statements/pivot 4345 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4346 def _parse_on() -> t.Optional[exp.Expression]: 4347 this = self._parse_bitwise() 4348 4349 if self._match(TokenType.IN): 4350 # PIVOT ... ON col IN (row_val1, row_val2) 4351 return self._parse_in(this) 4352 if self._match(TokenType.ALIAS, advance=False): 4353 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4354 return self._parse_alias(this) 4355 4356 return this 4357 4358 this = self._parse_table() 4359 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4360 into = self._parse_unpivot_columns() 4361 using = self._match(TokenType.USING) and self._parse_csv( 4362 lambda: self._parse_alias(self._parse_function()) 4363 ) 4364 group = self._parse_group() 4365 4366 return self.expression( 4367 exp.Pivot, 4368 this=this, 4369 expressions=expressions, 4370 using=using, 4371 group=group, 4372 unpivot=is_unpivot, 4373 into=into, 4374 ) 4375 4376 def _parse_pivot_in(self) -> exp.In: 4377 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4378 this = self._parse_select_or_expression() 4379 4380 self._match(TokenType.ALIAS) 4381 alias = self._parse_bitwise() 4382 if alias: 4383 if isinstance(alias, exp.Column) and not alias.db: 4384 alias = alias.this 4385 return self.expression(exp.PivotAlias, this=this, alias=alias) 4386 4387 return this 4388 4389 value = self._parse_column() 4390 4391 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4392 self.raise_error("Expecting IN (") 4393 4394 if self._match(TokenType.ANY): 4395 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4396 else: 4397 exprs = self._parse_csv(_parse_aliased_expression) 4398 4399 self._match_r_paren() 4400 return self.expression(exp.In, this=value, expressions=exprs) 4401 4402 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4403 func = self._parse_function() 4404 if not func: 4405 self.raise_error("Expecting an aggregation function in PIVOT") 4406 4407 return self._parse_alias(func) 4408 4409 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4410 index = self._index 4411 include_nulls = None 4412 4413 if self._match(TokenType.PIVOT): 4414 unpivot = False 4415 elif self._match(TokenType.UNPIVOT): 4416 unpivot = True 4417 4418 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4419 if self._match_text_seq("INCLUDE", "NULLS"): 4420 include_nulls = True 4421 elif self._match_text_seq("EXCLUDE", "NULLS"): 4422 include_nulls = False 4423 else: 4424 return None 4425 4426 expressions = [] 4427 4428 if not self._match(TokenType.L_PAREN): 4429 self._retreat(index) 4430 return None 4431 4432 if unpivot: 4433 expressions = self._parse_csv(self._parse_column) 4434 else: 4435 expressions = self._parse_csv(self._parse_pivot_aggregation) 4436 4437 if not expressions: 4438 self.raise_error("Failed to parse PIVOT's aggregation list") 4439 4440 if not self._match(TokenType.FOR): 4441 self.raise_error("Expecting FOR") 4442 4443 fields = [] 4444 while True: 4445 field = self._try_parse(self._parse_pivot_in) 4446 if not field: 4447 break 4448 fields.append(field) 4449 4450 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4451 self._parse_bitwise 4452 ) 4453 4454 group = self._parse_group() 4455 4456 self._match_r_paren() 4457 4458 pivot = self.expression( 4459 exp.Pivot, 4460 expressions=expressions, 4461 fields=fields, 4462 unpivot=unpivot, 4463 include_nulls=include_nulls, 4464 default_on_null=default_on_null, 4465 group=group, 4466 ) 4467 4468 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4469 pivot.set("alias", self._parse_table_alias()) 4470 4471 if not unpivot: 4472 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4473 4474 columns: t.List[exp.Expression] = [] 4475 all_fields = [] 4476 for pivot_field in pivot.fields: 4477 pivot_field_expressions = pivot_field.expressions 4478 4479 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4480 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4481 continue 4482 4483 all_fields.append( 4484 [ 4485 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4486 for fld in pivot_field_expressions 4487 ] 4488 ) 4489 4490 if all_fields: 4491 if names: 4492 all_fields.append(names) 4493 4494 # Generate all possible combinations of the pivot columns 4495 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4496 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4497 for fld_parts_tuple in itertools.product(*all_fields): 4498 fld_parts = list(fld_parts_tuple) 4499 4500 if names and self.PREFIXED_PIVOT_COLUMNS: 4501 # Move the "name" to the front of the list 4502 fld_parts.insert(0, fld_parts.pop(-1)) 4503 4504 columns.append(exp.to_identifier("_".join(fld_parts))) 4505 4506 pivot.set("columns", columns) 4507 4508 return pivot 4509 4510 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4511 return [agg.alias for agg in aggregations if agg.alias] 4512 4513 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4514 if not skip_where_token and not self._match(TokenType.PREWHERE): 4515 return None 4516 4517 return self.expression( 4518 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4519 ) 4520 4521 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4522 if not skip_where_token and not self._match(TokenType.WHERE): 4523 return None 4524 4525 return self.expression( 4526 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4527 ) 4528 4529 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4530 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4531 return None 4532 comments = self._prev_comments 4533 4534 elements: t.Dict[str, t.Any] = defaultdict(list) 4535 4536 if self._match(TokenType.ALL): 4537 elements["all"] = True 4538 elif self._match(TokenType.DISTINCT): 4539 elements["all"] = False 4540 4541 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4542 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4543 4544 while True: 4545 index = self._index 4546 4547 elements["expressions"].extend( 4548 self._parse_csv( 4549 lambda: None 4550 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4551 else self._parse_assignment() 4552 ) 4553 ) 4554 4555 before_with_index = self._index 4556 with_prefix = self._match(TokenType.WITH) 4557 4558 if self._match(TokenType.ROLLUP): 4559 elements["rollup"].append( 4560 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4561 ) 4562 elif self._match(TokenType.CUBE): 4563 elements["cube"].append( 4564 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4565 ) 4566 elif self._match(TokenType.GROUPING_SETS): 4567 elements["grouping_sets"].append( 4568 self.expression( 4569 exp.GroupingSets, 4570 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4571 ) 4572 ) 4573 elif self._match_text_seq("TOTALS"): 4574 elements["totals"] = True # type: ignore 4575 4576 if before_with_index <= self._index <= before_with_index + 1: 4577 self._retreat(before_with_index) 4578 break 4579 4580 if index == self._index: 4581 break 4582 4583 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4584 4585 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4586 return self.expression( 4587 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4588 ) 4589 4590 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4591 if self._match(TokenType.L_PAREN): 4592 grouping_set = self._parse_csv(self._parse_column) 4593 self._match_r_paren() 4594 return self.expression(exp.Tuple, expressions=grouping_set) 4595 4596 return self._parse_column() 4597 4598 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4599 if not skip_having_token and not self._match(TokenType.HAVING): 4600 return None 4601 return self.expression( 4602 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4603 ) 4604 4605 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4606 if not self._match(TokenType.QUALIFY): 4607 return None 4608 return self.expression(exp.Qualify, this=self._parse_assignment()) 4609 4610 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4611 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4612 exp.Prior, this=self._parse_bitwise() 4613 ) 4614 connect = self._parse_assignment() 4615 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4616 return connect 4617 4618 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4619 if skip_start_token: 4620 start = None 4621 elif self._match(TokenType.START_WITH): 4622 start = self._parse_assignment() 4623 else: 4624 return None 4625 4626 self._match(TokenType.CONNECT_BY) 4627 nocycle = self._match_text_seq("NOCYCLE") 4628 connect = self._parse_connect_with_prior() 4629 4630 if not start and self._match(TokenType.START_WITH): 4631 start = self._parse_assignment() 4632 4633 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4634 4635 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4636 this = self._parse_id_var(any_token=True) 4637 if self._match(TokenType.ALIAS): 4638 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4639 return this 4640 4641 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4642 if self._match_text_seq("INTERPOLATE"): 4643 return self._parse_wrapped_csv(self._parse_name_as_expression) 4644 return None 4645 4646 def _parse_order( 4647 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4648 ) -> t.Optional[exp.Expression]: 4649 siblings = None 4650 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4651 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4652 return this 4653 4654 siblings = True 4655 4656 return self.expression( 4657 exp.Order, 4658 comments=self._prev_comments, 4659 this=this, 4660 expressions=self._parse_csv(self._parse_ordered), 4661 siblings=siblings, 4662 ) 4663 4664 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4665 if not self._match(token): 4666 return None 4667 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4668 4669 def _parse_ordered( 4670 self, parse_method: t.Optional[t.Callable] = None 4671 ) -> t.Optional[exp.Ordered]: 4672 this = parse_method() if parse_method else self._parse_assignment() 4673 if not this: 4674 return None 4675 4676 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4677 this = exp.var("ALL") 4678 4679 asc = self._match(TokenType.ASC) 4680 desc = self._match(TokenType.DESC) or (asc and False) 4681 4682 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4683 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4684 4685 nulls_first = is_nulls_first or False 4686 explicitly_null_ordered = is_nulls_first or is_nulls_last 4687 4688 if ( 4689 not explicitly_null_ordered 4690 and ( 4691 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4692 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4693 ) 4694 and self.dialect.NULL_ORDERING != "nulls_are_last" 4695 ): 4696 nulls_first = True 4697 4698 if self._match_text_seq("WITH", "FILL"): 4699 with_fill = self.expression( 4700 exp.WithFill, 4701 **{ # type: ignore 4702 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4703 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4704 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4705 "interpolate": self._parse_interpolate(), 4706 }, 4707 ) 4708 else: 4709 with_fill = None 4710 4711 return self.expression( 4712 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4713 ) 4714 4715 def _parse_limit_options(self) -> exp.LimitOptions: 4716 percent = self._match(TokenType.PERCENT) 4717 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4718 self._match_text_seq("ONLY") 4719 with_ties = self._match_text_seq("WITH", "TIES") 4720 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4721 4722 def _parse_limit( 4723 self, 4724 this: t.Optional[exp.Expression] = None, 4725 top: bool = False, 4726 skip_limit_token: bool = False, 4727 ) -> t.Optional[exp.Expression]: 4728 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4729 comments = self._prev_comments 4730 if top: 4731 limit_paren = self._match(TokenType.L_PAREN) 4732 expression = self._parse_term() if limit_paren else self._parse_number() 4733 4734 if limit_paren: 4735 self._match_r_paren() 4736 4737 limit_options = self._parse_limit_options() 4738 else: 4739 limit_options = None 4740 expression = self._parse_term() 4741 4742 if self._match(TokenType.COMMA): 4743 offset = expression 4744 expression = self._parse_term() 4745 else: 4746 offset = None 4747 4748 limit_exp = self.expression( 4749 exp.Limit, 4750 this=this, 4751 expression=expression, 4752 offset=offset, 4753 comments=comments, 4754 limit_options=limit_options, 4755 expressions=self._parse_limit_by(), 4756 ) 4757 4758 return limit_exp 4759 4760 if self._match(TokenType.FETCH): 4761 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4762 direction = self._prev.text.upper() if direction else "FIRST" 4763 4764 count = self._parse_field(tokens=self.FETCH_TOKENS) 4765 4766 return self.expression( 4767 exp.Fetch, 4768 direction=direction, 4769 count=count, 4770 limit_options=self._parse_limit_options(), 4771 ) 4772 4773 return this 4774 4775 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4776 if not self._match(TokenType.OFFSET): 4777 return this 4778 4779 count = self._parse_term() 4780 self._match_set((TokenType.ROW, TokenType.ROWS)) 4781 4782 return self.expression( 4783 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4784 ) 4785 4786 def _can_parse_limit_or_offset(self) -> bool: 4787 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4788 return False 4789 4790 index = self._index 4791 result = bool( 4792 self._try_parse(self._parse_limit, retreat=True) 4793 or self._try_parse(self._parse_offset, retreat=True) 4794 ) 4795 self._retreat(index) 4796 return result 4797 4798 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4799 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4800 4801 def _parse_locks(self) -> t.List[exp.Lock]: 4802 locks = [] 4803 while True: 4804 update, key = None, None 4805 if self._match_text_seq("FOR", "UPDATE"): 4806 update = True 4807 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4808 "LOCK", "IN", "SHARE", "MODE" 4809 ): 4810 update = False 4811 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4812 update, key = False, True 4813 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4814 update, key = True, True 4815 else: 4816 break 4817 4818 expressions = None 4819 if self._match_text_seq("OF"): 4820 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4821 4822 wait: t.Optional[bool | exp.Expression] = None 4823 if self._match_text_seq("NOWAIT"): 4824 wait = True 4825 elif self._match_text_seq("WAIT"): 4826 wait = self._parse_primary() 4827 elif self._match_text_seq("SKIP", "LOCKED"): 4828 wait = False 4829 4830 locks.append( 4831 self.expression( 4832 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4833 ) 4834 ) 4835 4836 return locks 4837 4838 def parse_set_operation( 4839 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4840 ) -> t.Optional[exp.Expression]: 4841 start = self._index 4842 _, side_token, kind_token = self._parse_join_parts() 4843 4844 side = side_token.text if side_token else None 4845 kind = kind_token.text if kind_token else None 4846 4847 if not self._match_set(self.SET_OPERATIONS): 4848 self._retreat(start) 4849 return None 4850 4851 token_type = self._prev.token_type 4852 4853 if token_type == TokenType.UNION: 4854 operation: t.Type[exp.SetOperation] = exp.Union 4855 elif token_type == TokenType.EXCEPT: 4856 operation = exp.Except 4857 else: 4858 operation = exp.Intersect 4859 4860 comments = self._prev.comments 4861 4862 if self._match(TokenType.DISTINCT): 4863 distinct: t.Optional[bool] = True 4864 elif self._match(TokenType.ALL): 4865 distinct = False 4866 else: 4867 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4868 if distinct is None: 4869 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4870 4871 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4872 "STRICT", "CORRESPONDING" 4873 ) 4874 if self._match_text_seq("CORRESPONDING"): 4875 by_name = True 4876 if not side and not kind: 4877 kind = "INNER" 4878 4879 on_column_list = None 4880 if by_name and self._match_texts(("ON", "BY")): 4881 on_column_list = self._parse_wrapped_csv(self._parse_column) 4882 4883 expression = self._parse_select( 4884 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4885 ) 4886 4887 return self.expression( 4888 operation, 4889 comments=comments, 4890 this=this, 4891 distinct=distinct, 4892 by_name=by_name, 4893 expression=expression, 4894 side=side, 4895 kind=kind, 4896 on=on_column_list, 4897 ) 4898 4899 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4900 while this: 4901 setop = self.parse_set_operation(this) 4902 if not setop: 4903 break 4904 this = setop 4905 4906 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4907 expression = this.expression 4908 4909 if expression: 4910 for arg in self.SET_OP_MODIFIERS: 4911 expr = expression.args.get(arg) 4912 if expr: 4913 this.set(arg, expr.pop()) 4914 4915 return this 4916 4917 def _parse_expression(self) -> t.Optional[exp.Expression]: 4918 return self._parse_alias(self._parse_assignment()) 4919 4920 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4921 this = self._parse_disjunction() 4922 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4923 # This allows us to parse <non-identifier token> := <expr> 4924 this = exp.column( 4925 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4926 ) 4927 4928 while self._match_set(self.ASSIGNMENT): 4929 if isinstance(this, exp.Column) and len(this.parts) == 1: 4930 this = this.this 4931 4932 this = self.expression( 4933 self.ASSIGNMENT[self._prev.token_type], 4934 this=this, 4935 comments=self._prev_comments, 4936 expression=self._parse_assignment(), 4937 ) 4938 4939 return this 4940 4941 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4942 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4943 4944 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4945 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4946 4947 def _parse_equality(self) -> t.Optional[exp.Expression]: 4948 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4949 4950 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4951 return self._parse_tokens(self._parse_range, self.COMPARISON) 4952 4953 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4954 this = this or self._parse_bitwise() 4955 negate = self._match(TokenType.NOT) 4956 4957 if self._match_set(self.RANGE_PARSERS): 4958 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4959 if not expression: 4960 return this 4961 4962 this = expression 4963 elif self._match(TokenType.ISNULL): 4964 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4965 4966 # Postgres supports ISNULL and NOTNULL for conditions. 4967 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4968 if self._match(TokenType.NOTNULL): 4969 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4970 this = self.expression(exp.Not, this=this) 4971 4972 if negate: 4973 this = self._negate_range(this) 4974 4975 if self._match(TokenType.IS): 4976 this = self._parse_is(this) 4977 4978 return this 4979 4980 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4981 if not this: 4982 return this 4983 4984 return self.expression(exp.Not, this=this) 4985 4986 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4987 index = self._index - 1 4988 negate = self._match(TokenType.NOT) 4989 4990 if self._match_text_seq("DISTINCT", "FROM"): 4991 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4992 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4993 4994 if self._match(TokenType.JSON): 4995 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4996 4997 if self._match_text_seq("WITH"): 4998 _with = True 4999 elif self._match_text_seq("WITHOUT"): 5000 _with = False 5001 else: 5002 _with = None 5003 5004 unique = self._match(TokenType.UNIQUE) 5005 self._match_text_seq("KEYS") 5006 expression: t.Optional[exp.Expression] = self.expression( 5007 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5008 ) 5009 else: 5010 expression = self._parse_primary() or self._parse_null() 5011 if not expression: 5012 self._retreat(index) 5013 return None 5014 5015 this = self.expression(exp.Is, this=this, expression=expression) 5016 return self.expression(exp.Not, this=this) if negate else this 5017 5018 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5019 unnest = self._parse_unnest(with_alias=False) 5020 if unnest: 5021 this = self.expression(exp.In, this=this, unnest=unnest) 5022 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5023 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5024 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5025 5026 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5027 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5028 else: 5029 this = self.expression(exp.In, this=this, expressions=expressions) 5030 5031 if matched_l_paren: 5032 self._match_r_paren(this) 5033 elif not self._match(TokenType.R_BRACKET, expression=this): 5034 self.raise_error("Expecting ]") 5035 else: 5036 this = self.expression(exp.In, this=this, field=self._parse_column()) 5037 5038 return this 5039 5040 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5041 symmetric = None 5042 if self._match_text_seq("SYMMETRIC"): 5043 symmetric = True 5044 elif self._match_text_seq("ASYMMETRIC"): 5045 symmetric = False 5046 5047 low = self._parse_bitwise() 5048 self._match(TokenType.AND) 5049 high = self._parse_bitwise() 5050 5051 return self.expression( 5052 exp.Between, 5053 this=this, 5054 low=low, 5055 high=high, 5056 symmetric=symmetric, 5057 ) 5058 5059 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5060 if not self._match(TokenType.ESCAPE): 5061 return this 5062 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5063 5064 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5065 index = self._index 5066 5067 if not self._match(TokenType.INTERVAL) and match_interval: 5068 return None 5069 5070 if self._match(TokenType.STRING, advance=False): 5071 this = self._parse_primary() 5072 else: 5073 this = self._parse_term() 5074 5075 if not this or ( 5076 isinstance(this, exp.Column) 5077 and not this.table 5078 and not this.this.quoted 5079 and this.name.upper() == "IS" 5080 ): 5081 self._retreat(index) 5082 return None 5083 5084 unit = self._parse_function() or ( 5085 not self._match(TokenType.ALIAS, advance=False) 5086 and self._parse_var(any_token=True, upper=True) 5087 ) 5088 5089 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5090 # each INTERVAL expression into this canonical form so it's easy to transpile 5091 if this and this.is_number: 5092 this = exp.Literal.string(this.to_py()) 5093 elif this and this.is_string: 5094 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5095 if parts and unit: 5096 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5097 unit = None 5098 self._retreat(self._index - 1) 5099 5100 if len(parts) == 1: 5101 this = exp.Literal.string(parts[0][0]) 5102 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5103 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5104 unit = self.expression( 5105 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5106 ) 5107 5108 interval = self.expression(exp.Interval, this=this, unit=unit) 5109 5110 index = self._index 5111 self._match(TokenType.PLUS) 5112 5113 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5114 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5115 return self.expression( 5116 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5117 ) 5118 5119 self._retreat(index) 5120 return interval 5121 5122 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5123 this = self._parse_term() 5124 5125 while True: 5126 if self._match_set(self.BITWISE): 5127 this = self.expression( 5128 self.BITWISE[self._prev.token_type], 5129 this=this, 5130 expression=self._parse_term(), 5131 ) 5132 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5133 this = self.expression( 5134 exp.DPipe, 5135 this=this, 5136 expression=self._parse_term(), 5137 safe=not self.dialect.STRICT_STRING_CONCAT, 5138 ) 5139 elif self._match(TokenType.DQMARK): 5140 this = self.expression( 5141 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5142 ) 5143 elif self._match_pair(TokenType.LT, TokenType.LT): 5144 this = self.expression( 5145 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5146 ) 5147 elif self._match_pair(TokenType.GT, TokenType.GT): 5148 this = self.expression( 5149 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5150 ) 5151 else: 5152 break 5153 5154 return this 5155 5156 def _parse_term(self) -> t.Optional[exp.Expression]: 5157 this = self._parse_factor() 5158 5159 while self._match_set(self.TERM): 5160 klass = self.TERM[self._prev.token_type] 5161 comments = self._prev_comments 5162 expression = self._parse_factor() 5163 5164 this = self.expression(klass, this=this, comments=comments, expression=expression) 5165 5166 if isinstance(this, exp.Collate): 5167 expr = this.expression 5168 5169 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5170 # fallback to Identifier / Var 5171 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5172 ident = expr.this 5173 if isinstance(ident, exp.Identifier): 5174 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5175 5176 return this 5177 5178 def _parse_factor(self) -> t.Optional[exp.Expression]: 5179 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5180 this = parse_method() 5181 5182 while self._match_set(self.FACTOR): 5183 klass = self.FACTOR[self._prev.token_type] 5184 comments = self._prev_comments 5185 expression = parse_method() 5186 5187 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5188 self._retreat(self._index - 1) 5189 return this 5190 5191 this = self.expression(klass, this=this, comments=comments, expression=expression) 5192 5193 if isinstance(this, exp.Div): 5194 this.args["typed"] = self.dialect.TYPED_DIVISION 5195 this.args["safe"] = self.dialect.SAFE_DIVISION 5196 5197 return this 5198 5199 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5200 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5201 5202 def _parse_unary(self) -> t.Optional[exp.Expression]: 5203 if self._match_set(self.UNARY_PARSERS): 5204 return self.UNARY_PARSERS[self._prev.token_type](self) 5205 return self._parse_at_time_zone(self._parse_type()) 5206 5207 def _parse_type( 5208 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5209 ) -> t.Optional[exp.Expression]: 5210 interval = parse_interval and self._parse_interval() 5211 if interval: 5212 return interval 5213 5214 index = self._index 5215 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5216 5217 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5218 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5219 if isinstance(data_type, exp.Cast): 5220 # This constructor can contain ops directly after it, for instance struct unnesting: 5221 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5222 return self._parse_column_ops(data_type) 5223 5224 if data_type: 5225 index2 = self._index 5226 this = self._parse_primary() 5227 5228 if isinstance(this, exp.Literal): 5229 literal = this.name 5230 this = self._parse_column_ops(this) 5231 5232 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5233 if parser: 5234 return parser(self, this, data_type) 5235 5236 if ( 5237 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5238 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5239 and TIME_ZONE_RE.search(literal) 5240 ): 5241 data_type = exp.DataType.build("TIMESTAMPTZ") 5242 5243 return self.expression(exp.Cast, this=this, to=data_type) 5244 5245 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5246 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5247 # 5248 # If the index difference here is greater than 1, that means the parser itself must have 5249 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5250 # 5251 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5252 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5253 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5254 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5255 # 5256 # In these cases, we don't really want to return the converted type, but instead retreat 5257 # and try to parse a Column or Identifier in the section below. 5258 if data_type.expressions and index2 - index > 1: 5259 self._retreat(index2) 5260 return self._parse_column_ops(data_type) 5261 5262 self._retreat(index) 5263 5264 if fallback_to_identifier: 5265 return self._parse_id_var() 5266 5267 this = self._parse_column() 5268 return this and self._parse_column_ops(this) 5269 5270 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5271 this = self._parse_type() 5272 if not this: 5273 return None 5274 5275 if isinstance(this, exp.Column) and not this.table: 5276 this = exp.var(this.name.upper()) 5277 5278 return self.expression( 5279 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5280 ) 5281 5282 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5283 type_name = identifier.name 5284 5285 while self._match(TokenType.DOT): 5286 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5287 5288 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5289 5290 def _parse_types( 5291 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5292 ) -> t.Optional[exp.Expression]: 5293 index = self._index 5294 5295 this: t.Optional[exp.Expression] = None 5296 prefix = self._match_text_seq("SYSUDTLIB", ".") 5297 5298 if not self._match_set(self.TYPE_TOKENS): 5299 identifier = allow_identifiers and self._parse_id_var( 5300 any_token=False, tokens=(TokenType.VAR,) 5301 ) 5302 if isinstance(identifier, exp.Identifier): 5303 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5304 5305 if len(tokens) != 1: 5306 self.raise_error("Unexpected identifier", self._prev) 5307 5308 if tokens[0].token_type in self.TYPE_TOKENS: 5309 self._prev = tokens[0] 5310 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5311 this = self._parse_user_defined_type(identifier) 5312 else: 5313 self._retreat(self._index - 1) 5314 return None 5315 else: 5316 return None 5317 5318 type_token = self._prev.token_type 5319 5320 if type_token == TokenType.PSEUDO_TYPE: 5321 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5322 5323 if type_token == TokenType.OBJECT_IDENTIFIER: 5324 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5325 5326 # https://materialize.com/docs/sql/types/map/ 5327 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5328 key_type = self._parse_types( 5329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5330 ) 5331 if not self._match(TokenType.FARROW): 5332 self._retreat(index) 5333 return None 5334 5335 value_type = self._parse_types( 5336 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5337 ) 5338 if not self._match(TokenType.R_BRACKET): 5339 self._retreat(index) 5340 return None 5341 5342 return exp.DataType( 5343 this=exp.DataType.Type.MAP, 5344 expressions=[key_type, value_type], 5345 nested=True, 5346 prefix=prefix, 5347 ) 5348 5349 nested = type_token in self.NESTED_TYPE_TOKENS 5350 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5351 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5352 expressions = None 5353 maybe_func = False 5354 5355 if self._match(TokenType.L_PAREN): 5356 if is_struct: 5357 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5358 elif nested: 5359 expressions = self._parse_csv( 5360 lambda: self._parse_types( 5361 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5362 ) 5363 ) 5364 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5365 this = expressions[0] 5366 this.set("nullable", True) 5367 self._match_r_paren() 5368 return this 5369 elif type_token in self.ENUM_TYPE_TOKENS: 5370 expressions = self._parse_csv(self._parse_equality) 5371 elif is_aggregate: 5372 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5373 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5374 ) 5375 if not func_or_ident: 5376 return None 5377 expressions = [func_or_ident] 5378 if self._match(TokenType.COMMA): 5379 expressions.extend( 5380 self._parse_csv( 5381 lambda: self._parse_types( 5382 check_func=check_func, 5383 schema=schema, 5384 allow_identifiers=allow_identifiers, 5385 ) 5386 ) 5387 ) 5388 else: 5389 expressions = self._parse_csv(self._parse_type_size) 5390 5391 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5392 if type_token == TokenType.VECTOR and len(expressions) == 2: 5393 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5394 5395 if not expressions or not self._match(TokenType.R_PAREN): 5396 self._retreat(index) 5397 return None 5398 5399 maybe_func = True 5400 5401 values: t.Optional[t.List[exp.Expression]] = None 5402 5403 if nested and self._match(TokenType.LT): 5404 if is_struct: 5405 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5406 else: 5407 expressions = self._parse_csv( 5408 lambda: self._parse_types( 5409 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5410 ) 5411 ) 5412 5413 if not self._match(TokenType.GT): 5414 self.raise_error("Expecting >") 5415 5416 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5417 values = self._parse_csv(self._parse_assignment) 5418 if not values and is_struct: 5419 values = None 5420 self._retreat(self._index - 1) 5421 else: 5422 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5423 5424 if type_token in self.TIMESTAMPS: 5425 if self._match_text_seq("WITH", "TIME", "ZONE"): 5426 maybe_func = False 5427 tz_type = ( 5428 exp.DataType.Type.TIMETZ 5429 if type_token in self.TIMES 5430 else exp.DataType.Type.TIMESTAMPTZ 5431 ) 5432 this = exp.DataType(this=tz_type, expressions=expressions) 5433 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5434 maybe_func = False 5435 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5436 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5437 maybe_func = False 5438 elif type_token == TokenType.INTERVAL: 5439 unit = self._parse_var(upper=True) 5440 if unit: 5441 if self._match_text_seq("TO"): 5442 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5443 5444 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5445 else: 5446 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5447 elif type_token == TokenType.VOID: 5448 this = exp.DataType(this=exp.DataType.Type.NULL) 5449 5450 if maybe_func and check_func: 5451 index2 = self._index 5452 peek = self._parse_string() 5453 5454 if not peek: 5455 self._retreat(index) 5456 return None 5457 5458 self._retreat(index2) 5459 5460 if not this: 5461 if self._match_text_seq("UNSIGNED"): 5462 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5463 if not unsigned_type_token: 5464 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5465 5466 type_token = unsigned_type_token or type_token 5467 5468 this = exp.DataType( 5469 this=exp.DataType.Type[type_token.value], 5470 expressions=expressions, 5471 nested=nested, 5472 prefix=prefix, 5473 ) 5474 5475 # Empty arrays/structs are allowed 5476 if values is not None: 5477 cls = exp.Struct if is_struct else exp.Array 5478 this = exp.cast(cls(expressions=values), this, copy=False) 5479 5480 elif expressions: 5481 this.set("expressions", expressions) 5482 5483 # https://materialize.com/docs/sql/types/list/#type-name 5484 while self._match(TokenType.LIST): 5485 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5486 5487 index = self._index 5488 5489 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5490 matched_array = self._match(TokenType.ARRAY) 5491 5492 while self._curr: 5493 datatype_token = self._prev.token_type 5494 matched_l_bracket = self._match(TokenType.L_BRACKET) 5495 5496 if (not matched_l_bracket and not matched_array) or ( 5497 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5498 ): 5499 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5500 # not to be confused with the fixed size array parsing 5501 break 5502 5503 matched_array = False 5504 values = self._parse_csv(self._parse_assignment) or None 5505 if ( 5506 values 5507 and not schema 5508 and ( 5509 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5510 ) 5511 ): 5512 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5513 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5514 self._retreat(index) 5515 break 5516 5517 this = exp.DataType( 5518 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5519 ) 5520 self._match(TokenType.R_BRACKET) 5521 5522 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5523 converter = self.TYPE_CONVERTERS.get(this.this) 5524 if converter: 5525 this = converter(t.cast(exp.DataType, this)) 5526 5527 return this 5528 5529 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5530 index = self._index 5531 5532 if ( 5533 self._curr 5534 and self._next 5535 and self._curr.token_type in self.TYPE_TOKENS 5536 and self._next.token_type in self.TYPE_TOKENS 5537 ): 5538 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5539 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5540 this = self._parse_id_var() 5541 else: 5542 this = ( 5543 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5544 or self._parse_id_var() 5545 ) 5546 5547 self._match(TokenType.COLON) 5548 5549 if ( 5550 type_required 5551 and not isinstance(this, exp.DataType) 5552 and not self._match_set(self.TYPE_TOKENS, advance=False) 5553 ): 5554 self._retreat(index) 5555 return self._parse_types() 5556 5557 return self._parse_column_def(this) 5558 5559 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5560 if not self._match_text_seq("AT", "TIME", "ZONE"): 5561 return this 5562 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5563 5564 def _parse_column(self) -> t.Optional[exp.Expression]: 5565 this = self._parse_column_reference() 5566 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5567 5568 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5569 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5570 5571 return column 5572 5573 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5574 this = self._parse_field() 5575 if ( 5576 not this 5577 and self._match(TokenType.VALUES, advance=False) 5578 and self.VALUES_FOLLOWED_BY_PAREN 5579 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5580 ): 5581 this = self._parse_id_var() 5582 5583 if isinstance(this, exp.Identifier): 5584 # We bubble up comments from the Identifier to the Column 5585 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5586 5587 return this 5588 5589 def _parse_colon_as_variant_extract( 5590 self, this: t.Optional[exp.Expression] 5591 ) -> t.Optional[exp.Expression]: 5592 casts = [] 5593 json_path = [] 5594 escape = None 5595 5596 while self._match(TokenType.COLON): 5597 start_index = self._index 5598 5599 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5600 path = self._parse_column_ops( 5601 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5602 ) 5603 5604 # The cast :: operator has a lower precedence than the extraction operator :, so 5605 # we rearrange the AST appropriately to avoid casting the JSON path 5606 while isinstance(path, exp.Cast): 5607 casts.append(path.to) 5608 path = path.this 5609 5610 if casts: 5611 dcolon_offset = next( 5612 i 5613 for i, t in enumerate(self._tokens[start_index:]) 5614 if t.token_type == TokenType.DCOLON 5615 ) 5616 end_token = self._tokens[start_index + dcolon_offset - 1] 5617 else: 5618 end_token = self._prev 5619 5620 if path: 5621 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5622 # it'll roundtrip to a string literal in GET_PATH 5623 if isinstance(path, exp.Identifier) and path.quoted: 5624 escape = True 5625 5626 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5627 5628 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5629 # Databricks transforms it back to the colon/dot notation 5630 if json_path: 5631 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5632 5633 if json_path_expr: 5634 json_path_expr.set("escape", escape) 5635 5636 this = self.expression( 5637 exp.JSONExtract, 5638 this=this, 5639 expression=json_path_expr, 5640 variant_extract=True, 5641 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5642 ) 5643 5644 while casts: 5645 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5646 5647 return this 5648 5649 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5650 return self._parse_types() 5651 5652 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5653 this = self._parse_bracket(this) 5654 5655 while self._match_set(self.COLUMN_OPERATORS): 5656 op_token = self._prev.token_type 5657 op = self.COLUMN_OPERATORS.get(op_token) 5658 5659 if op_token in self.CAST_COLUMN_OPERATORS: 5660 field = self._parse_dcolon() 5661 if not field: 5662 self.raise_error("Expected type") 5663 elif op and self._curr: 5664 field = self._parse_column_reference() or self._parse_bracket() 5665 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5666 field = self._parse_column_ops(field) 5667 else: 5668 field = self._parse_field(any_token=True, anonymous_func=True) 5669 5670 # Function calls can be qualified, e.g., x.y.FOO() 5671 # This converts the final AST to a series of Dots leading to the function call 5672 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5673 if isinstance(field, (exp.Func, exp.Window)) and this: 5674 this = this.transform( 5675 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5676 ) 5677 5678 if op: 5679 this = op(self, this, field) 5680 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5681 this = self.expression( 5682 exp.Column, 5683 comments=this.comments, 5684 this=field, 5685 table=this.this, 5686 db=this.args.get("table"), 5687 catalog=this.args.get("db"), 5688 ) 5689 elif isinstance(field, exp.Window): 5690 # Move the exp.Dot's to the window's function 5691 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5692 field.set("this", window_func) 5693 this = field 5694 else: 5695 this = self.expression(exp.Dot, this=this, expression=field) 5696 5697 if field and field.comments: 5698 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5699 5700 this = self._parse_bracket(this) 5701 5702 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5703 5704 def _parse_paren(self) -> t.Optional[exp.Expression]: 5705 if not self._match(TokenType.L_PAREN): 5706 return None 5707 5708 comments = self._prev_comments 5709 query = self._parse_select() 5710 5711 if query: 5712 expressions = [query] 5713 else: 5714 expressions = self._parse_expressions() 5715 5716 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5717 5718 if not this and self._match(TokenType.R_PAREN, advance=False): 5719 this = self.expression(exp.Tuple) 5720 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5721 this = self._parse_subquery(this=this, parse_alias=False) 5722 elif isinstance(this, exp.Subquery): 5723 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5724 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5725 this = self.expression(exp.Tuple, expressions=expressions) 5726 else: 5727 this = self.expression(exp.Paren, this=this) 5728 5729 if this: 5730 this.add_comments(comments) 5731 5732 self._match_r_paren(expression=this) 5733 return this 5734 5735 def _parse_primary(self) -> t.Optional[exp.Expression]: 5736 if self._match_set(self.PRIMARY_PARSERS): 5737 token_type = self._prev.token_type 5738 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5739 5740 if token_type == TokenType.STRING: 5741 expressions = [primary] 5742 while self._match(TokenType.STRING): 5743 expressions.append(exp.Literal.string(self._prev.text)) 5744 5745 if len(expressions) > 1: 5746 return self.expression(exp.Concat, expressions=expressions) 5747 5748 return primary 5749 5750 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5751 return exp.Literal.number(f"0.{self._prev.text}") 5752 5753 return self._parse_paren() 5754 5755 def _parse_field( 5756 self, 5757 any_token: bool = False, 5758 tokens: t.Optional[t.Collection[TokenType]] = None, 5759 anonymous_func: bool = False, 5760 ) -> t.Optional[exp.Expression]: 5761 if anonymous_func: 5762 field = ( 5763 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5764 or self._parse_primary() 5765 ) 5766 else: 5767 field = self._parse_primary() or self._parse_function( 5768 anonymous=anonymous_func, any_token=any_token 5769 ) 5770 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5771 5772 def _parse_function( 5773 self, 5774 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5775 anonymous: bool = False, 5776 optional_parens: bool = True, 5777 any_token: bool = False, 5778 ) -> t.Optional[exp.Expression]: 5779 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5780 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5781 fn_syntax = False 5782 if ( 5783 self._match(TokenType.L_BRACE, advance=False) 5784 and self._next 5785 and self._next.text.upper() == "FN" 5786 ): 5787 self._advance(2) 5788 fn_syntax = True 5789 5790 func = self._parse_function_call( 5791 functions=functions, 5792 anonymous=anonymous, 5793 optional_parens=optional_parens, 5794 any_token=any_token, 5795 ) 5796 5797 if fn_syntax: 5798 self._match(TokenType.R_BRACE) 5799 5800 return func 5801 5802 def _parse_function_call( 5803 self, 5804 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5805 anonymous: bool = False, 5806 optional_parens: bool = True, 5807 any_token: bool = False, 5808 ) -> t.Optional[exp.Expression]: 5809 if not self._curr: 5810 return None 5811 5812 comments = self._curr.comments 5813 prev = self._prev 5814 token = self._curr 5815 token_type = self._curr.token_type 5816 this = self._curr.text 5817 upper = this.upper() 5818 5819 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5820 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5821 self._advance() 5822 return self._parse_window(parser(self)) 5823 5824 if not self._next or self._next.token_type != TokenType.L_PAREN: 5825 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5826 self._advance() 5827 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5828 5829 return None 5830 5831 if any_token: 5832 if token_type in self.RESERVED_TOKENS: 5833 return None 5834 elif token_type not in self.FUNC_TOKENS: 5835 return None 5836 5837 self._advance(2) 5838 5839 parser = self.FUNCTION_PARSERS.get(upper) 5840 if parser and not anonymous: 5841 this = parser(self) 5842 else: 5843 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5844 5845 if subquery_predicate: 5846 expr = None 5847 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5848 expr = self._parse_select() 5849 self._match_r_paren() 5850 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5851 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5852 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5853 self._advance(-1) 5854 expr = self._parse_bitwise() 5855 5856 if expr: 5857 return self.expression(subquery_predicate, comments=comments, this=expr) 5858 5859 if functions is None: 5860 functions = self.FUNCTIONS 5861 5862 function = functions.get(upper) 5863 known_function = function and not anonymous 5864 5865 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5866 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5867 5868 post_func_comments = self._curr and self._curr.comments 5869 if known_function and post_func_comments: 5870 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5871 # call we'll construct it as exp.Anonymous, even if it's "known" 5872 if any( 5873 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5874 for comment in post_func_comments 5875 ): 5876 known_function = False 5877 5878 if alias and known_function: 5879 args = self._kv_to_prop_eq(args) 5880 5881 if known_function: 5882 func_builder = t.cast(t.Callable, function) 5883 5884 if "dialect" in func_builder.__code__.co_varnames: 5885 func = func_builder(args, dialect=self.dialect) 5886 else: 5887 func = func_builder(args) 5888 5889 func = self.validate_expression(func, args) 5890 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5891 func.meta["name"] = this 5892 5893 this = func 5894 else: 5895 if token_type == TokenType.IDENTIFIER: 5896 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5897 5898 this = self.expression(exp.Anonymous, this=this, expressions=args) 5899 this = this.update_positions(token) 5900 5901 if isinstance(this, exp.Expression): 5902 this.add_comments(comments) 5903 5904 self._match_r_paren(this) 5905 return self._parse_window(this) 5906 5907 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5908 return expression 5909 5910 def _kv_to_prop_eq( 5911 self, expressions: t.List[exp.Expression], parse_map: bool = False 5912 ) -> t.List[exp.Expression]: 5913 transformed = [] 5914 5915 for index, e in enumerate(expressions): 5916 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5917 if isinstance(e, exp.Alias): 5918 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5919 5920 if not isinstance(e, exp.PropertyEQ): 5921 e = self.expression( 5922 exp.PropertyEQ, 5923 this=e.this if parse_map else exp.to_identifier(e.this.name), 5924 expression=e.expression, 5925 ) 5926 5927 if isinstance(e.this, exp.Column): 5928 e.this.replace(e.this.this) 5929 else: 5930 e = self._to_prop_eq(e, index) 5931 5932 transformed.append(e) 5933 5934 return transformed 5935 5936 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5937 return self._parse_statement() 5938 5939 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5940 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5941 5942 def _parse_user_defined_function( 5943 self, kind: t.Optional[TokenType] = None 5944 ) -> t.Optional[exp.Expression]: 5945 this = self._parse_table_parts(schema=True) 5946 5947 if not self._match(TokenType.L_PAREN): 5948 return this 5949 5950 expressions = self._parse_csv(self._parse_function_parameter) 5951 self._match_r_paren() 5952 return self.expression( 5953 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5954 ) 5955 5956 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5957 literal = self._parse_primary() 5958 if literal: 5959 return self.expression(exp.Introducer, this=token.text, expression=literal) 5960 5961 return self._identifier_expression(token) 5962 5963 def _parse_session_parameter(self) -> exp.SessionParameter: 5964 kind = None 5965 this = self._parse_id_var() or self._parse_primary() 5966 5967 if this and self._match(TokenType.DOT): 5968 kind = this.name 5969 this = self._parse_var() or self._parse_primary() 5970 5971 return self.expression(exp.SessionParameter, this=this, kind=kind) 5972 5973 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5974 return self._parse_id_var() 5975 5976 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5977 index = self._index 5978 5979 if self._match(TokenType.L_PAREN): 5980 expressions = t.cast( 5981 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5982 ) 5983 5984 if not self._match(TokenType.R_PAREN): 5985 self._retreat(index) 5986 else: 5987 expressions = [self._parse_lambda_arg()] 5988 5989 if self._match_set(self.LAMBDAS): 5990 return self.LAMBDAS[self._prev.token_type](self, expressions) 5991 5992 self._retreat(index) 5993 5994 this: t.Optional[exp.Expression] 5995 5996 if self._match(TokenType.DISTINCT): 5997 this = self.expression( 5998 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5999 ) 6000 else: 6001 this = self._parse_select_or_expression(alias=alias) 6002 6003 return self._parse_limit( 6004 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6005 ) 6006 6007 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6008 index = self._index 6009 if not self._match(TokenType.L_PAREN): 6010 return this 6011 6012 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6013 # expr can be of both types 6014 if self._match_set(self.SELECT_START_TOKENS): 6015 self._retreat(index) 6016 return this 6017 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6018 self._match_r_paren() 6019 return self.expression(exp.Schema, this=this, expressions=args) 6020 6021 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6022 return self._parse_column_def(self._parse_field(any_token=True)) 6023 6024 def _parse_column_def( 6025 self, this: t.Optional[exp.Expression], computed_column: bool = True 6026 ) -> t.Optional[exp.Expression]: 6027 # column defs are not really columns, they're identifiers 6028 if isinstance(this, exp.Column): 6029 this = this.this 6030 6031 if not computed_column: 6032 self._match(TokenType.ALIAS) 6033 6034 kind = self._parse_types(schema=True) 6035 6036 if self._match_text_seq("FOR", "ORDINALITY"): 6037 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6038 6039 constraints: t.List[exp.Expression] = [] 6040 6041 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6042 ("ALIAS", "MATERIALIZED") 6043 ): 6044 persisted = self._prev.text.upper() == "MATERIALIZED" 6045 constraint_kind = exp.ComputedColumnConstraint( 6046 this=self._parse_assignment(), 6047 persisted=persisted or self._match_text_seq("PERSISTED"), 6048 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6049 ) 6050 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6051 elif ( 6052 kind 6053 and self._match(TokenType.ALIAS, advance=False) 6054 and ( 6055 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6056 or (self._next and self._next.token_type == TokenType.L_PAREN) 6057 ) 6058 ): 6059 self._advance() 6060 constraints.append( 6061 self.expression( 6062 exp.ColumnConstraint, 6063 kind=exp.ComputedColumnConstraint( 6064 this=self._parse_disjunction(), 6065 persisted=self._match_texts(("STORED", "VIRTUAL")) 6066 and self._prev.text.upper() == "STORED", 6067 ), 6068 ) 6069 ) 6070 6071 while True: 6072 constraint = self._parse_column_constraint() 6073 if not constraint: 6074 break 6075 constraints.append(constraint) 6076 6077 if not kind and not constraints: 6078 return this 6079 6080 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6081 6082 def _parse_auto_increment( 6083 self, 6084 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6085 start = None 6086 increment = None 6087 order = None 6088 6089 if self._match(TokenType.L_PAREN, advance=False): 6090 args = self._parse_wrapped_csv(self._parse_bitwise) 6091 start = seq_get(args, 0) 6092 increment = seq_get(args, 1) 6093 elif self._match_text_seq("START"): 6094 start = self._parse_bitwise() 6095 self._match_text_seq("INCREMENT") 6096 increment = self._parse_bitwise() 6097 if self._match_text_seq("ORDER"): 6098 order = True 6099 elif self._match_text_seq("NOORDER"): 6100 order = False 6101 6102 if start and increment: 6103 return exp.GeneratedAsIdentityColumnConstraint( 6104 start=start, increment=increment, this=False, order=order 6105 ) 6106 6107 return exp.AutoIncrementColumnConstraint() 6108 6109 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6110 if not self._match_text_seq("REFRESH"): 6111 self._retreat(self._index - 1) 6112 return None 6113 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6114 6115 def _parse_compress(self) -> exp.CompressColumnConstraint: 6116 if self._match(TokenType.L_PAREN, advance=False): 6117 return self.expression( 6118 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6119 ) 6120 6121 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6122 6123 def _parse_generated_as_identity( 6124 self, 6125 ) -> ( 6126 exp.GeneratedAsIdentityColumnConstraint 6127 | exp.ComputedColumnConstraint 6128 | exp.GeneratedAsRowColumnConstraint 6129 ): 6130 if self._match_text_seq("BY", "DEFAULT"): 6131 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6132 this = self.expression( 6133 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6134 ) 6135 else: 6136 self._match_text_seq("ALWAYS") 6137 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6138 6139 self._match(TokenType.ALIAS) 6140 6141 if self._match_text_seq("ROW"): 6142 start = self._match_text_seq("START") 6143 if not start: 6144 self._match(TokenType.END) 6145 hidden = self._match_text_seq("HIDDEN") 6146 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6147 6148 identity = self._match_text_seq("IDENTITY") 6149 6150 if self._match(TokenType.L_PAREN): 6151 if self._match(TokenType.START_WITH): 6152 this.set("start", self._parse_bitwise()) 6153 if self._match_text_seq("INCREMENT", "BY"): 6154 this.set("increment", self._parse_bitwise()) 6155 if self._match_text_seq("MINVALUE"): 6156 this.set("minvalue", self._parse_bitwise()) 6157 if self._match_text_seq("MAXVALUE"): 6158 this.set("maxvalue", self._parse_bitwise()) 6159 6160 if self._match_text_seq("CYCLE"): 6161 this.set("cycle", True) 6162 elif self._match_text_seq("NO", "CYCLE"): 6163 this.set("cycle", False) 6164 6165 if not identity: 6166 this.set("expression", self._parse_range()) 6167 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6168 args = self._parse_csv(self._parse_bitwise) 6169 this.set("start", seq_get(args, 0)) 6170 this.set("increment", seq_get(args, 1)) 6171 6172 self._match_r_paren() 6173 6174 return this 6175 6176 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6177 self._match_text_seq("LENGTH") 6178 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6179 6180 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6181 if self._match_text_seq("NULL"): 6182 return self.expression(exp.NotNullColumnConstraint) 6183 if self._match_text_seq("CASESPECIFIC"): 6184 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6185 if self._match_text_seq("FOR", "REPLICATION"): 6186 return self.expression(exp.NotForReplicationColumnConstraint) 6187 6188 # Unconsume the `NOT` token 6189 self._retreat(self._index - 1) 6190 return None 6191 6192 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6193 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6194 6195 procedure_option_follows = ( 6196 self._match(TokenType.WITH, advance=False) 6197 and self._next 6198 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6199 ) 6200 6201 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6202 return self.expression( 6203 exp.ColumnConstraint, 6204 this=this, 6205 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6206 ) 6207 6208 return this 6209 6210 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6211 if not self._match(TokenType.CONSTRAINT): 6212 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6213 6214 return self.expression( 6215 exp.Constraint, 6216 this=self._parse_id_var(), 6217 expressions=self._parse_unnamed_constraints(), 6218 ) 6219 6220 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6221 constraints = [] 6222 while True: 6223 constraint = self._parse_unnamed_constraint() or self._parse_function() 6224 if not constraint: 6225 break 6226 constraints.append(constraint) 6227 6228 return constraints 6229 6230 def _parse_unnamed_constraint( 6231 self, constraints: t.Optional[t.Collection[str]] = None 6232 ) -> t.Optional[exp.Expression]: 6233 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6234 constraints or self.CONSTRAINT_PARSERS 6235 ): 6236 return None 6237 6238 constraint = self._prev.text.upper() 6239 if constraint not in self.CONSTRAINT_PARSERS: 6240 self.raise_error(f"No parser found for schema constraint {constraint}.") 6241 6242 return self.CONSTRAINT_PARSERS[constraint](self) 6243 6244 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6245 return self._parse_id_var(any_token=False) 6246 6247 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6248 self._match_texts(("KEY", "INDEX")) 6249 return self.expression( 6250 exp.UniqueColumnConstraint, 6251 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6252 this=self._parse_schema(self._parse_unique_key()), 6253 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6254 on_conflict=self._parse_on_conflict(), 6255 options=self._parse_key_constraint_options(), 6256 ) 6257 6258 def _parse_key_constraint_options(self) -> t.List[str]: 6259 options = [] 6260 while True: 6261 if not self._curr: 6262 break 6263 6264 if self._match(TokenType.ON): 6265 action = None 6266 on = self._advance_any() and self._prev.text 6267 6268 if self._match_text_seq("NO", "ACTION"): 6269 action = "NO ACTION" 6270 elif self._match_text_seq("CASCADE"): 6271 action = "CASCADE" 6272 elif self._match_text_seq("RESTRICT"): 6273 action = "RESTRICT" 6274 elif self._match_pair(TokenType.SET, TokenType.NULL): 6275 action = "SET NULL" 6276 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6277 action = "SET DEFAULT" 6278 else: 6279 self.raise_error("Invalid key constraint") 6280 6281 options.append(f"ON {on} {action}") 6282 else: 6283 var = self._parse_var_from_options( 6284 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6285 ) 6286 if not var: 6287 break 6288 options.append(var.name) 6289 6290 return options 6291 6292 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6293 if match and not self._match(TokenType.REFERENCES): 6294 return None 6295 6296 expressions = None 6297 this = self._parse_table(schema=True) 6298 options = self._parse_key_constraint_options() 6299 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6300 6301 def _parse_foreign_key(self) -> exp.ForeignKey: 6302 expressions = ( 6303 self._parse_wrapped_id_vars() 6304 if not self._match(TokenType.REFERENCES, advance=False) 6305 else None 6306 ) 6307 reference = self._parse_references() 6308 on_options = {} 6309 6310 while self._match(TokenType.ON): 6311 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6312 self.raise_error("Expected DELETE or UPDATE") 6313 6314 kind = self._prev.text.lower() 6315 6316 if self._match_text_seq("NO", "ACTION"): 6317 action = "NO ACTION" 6318 elif self._match(TokenType.SET): 6319 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6320 action = "SET " + self._prev.text.upper() 6321 else: 6322 self._advance() 6323 action = self._prev.text.upper() 6324 6325 on_options[kind] = action 6326 6327 return self.expression( 6328 exp.ForeignKey, 6329 expressions=expressions, 6330 reference=reference, 6331 options=self._parse_key_constraint_options(), 6332 **on_options, # type: ignore 6333 ) 6334 6335 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6336 return self._parse_ordered() or self._parse_field() 6337 6338 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6339 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6340 self._retreat(self._index - 1) 6341 return None 6342 6343 id_vars = self._parse_wrapped_id_vars() 6344 return self.expression( 6345 exp.PeriodForSystemTimeConstraint, 6346 this=seq_get(id_vars, 0), 6347 expression=seq_get(id_vars, 1), 6348 ) 6349 6350 def _parse_primary_key( 6351 self, wrapped_optional: bool = False, in_props: bool = False 6352 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6353 desc = ( 6354 self._match_set((TokenType.ASC, TokenType.DESC)) 6355 and self._prev.token_type == TokenType.DESC 6356 ) 6357 6358 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6359 return self.expression( 6360 exp.PrimaryKeyColumnConstraint, 6361 desc=desc, 6362 options=self._parse_key_constraint_options(), 6363 ) 6364 6365 expressions = self._parse_wrapped_csv( 6366 self._parse_primary_key_part, optional=wrapped_optional 6367 ) 6368 6369 return self.expression( 6370 exp.PrimaryKey, 6371 expressions=expressions, 6372 include=self._parse_index_params(), 6373 options=self._parse_key_constraint_options(), 6374 ) 6375 6376 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6377 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6378 6379 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6380 """ 6381 Parses a datetime column in ODBC format. We parse the column into the corresponding 6382 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6383 same as we did for `DATE('yyyy-mm-dd')`. 6384 6385 Reference: 6386 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6387 """ 6388 self._match(TokenType.VAR) 6389 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6390 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6391 if not self._match(TokenType.R_BRACE): 6392 self.raise_error("Expected }") 6393 return expression 6394 6395 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6396 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6397 return this 6398 6399 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6400 map_token = seq_get(self._tokens, self._index - 2) 6401 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6402 else: 6403 parse_map = False 6404 6405 bracket_kind = self._prev.token_type 6406 if ( 6407 bracket_kind == TokenType.L_BRACE 6408 and self._curr 6409 and self._curr.token_type == TokenType.VAR 6410 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6411 ): 6412 return self._parse_odbc_datetime_literal() 6413 6414 expressions = self._parse_csv( 6415 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6416 ) 6417 6418 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6419 self.raise_error("Expected ]") 6420 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6421 self.raise_error("Expected }") 6422 6423 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6424 if bracket_kind == TokenType.L_BRACE: 6425 this = self.expression( 6426 exp.Struct, 6427 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6428 ) 6429 elif not this: 6430 this = build_array_constructor( 6431 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6432 ) 6433 else: 6434 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6435 if constructor_type: 6436 return build_array_constructor( 6437 constructor_type, 6438 args=expressions, 6439 bracket_kind=bracket_kind, 6440 dialect=self.dialect, 6441 ) 6442 6443 expressions = apply_index_offset( 6444 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6445 ) 6446 this = self.expression( 6447 exp.Bracket, 6448 this=this, 6449 expressions=expressions, 6450 comments=this.pop_comments(), 6451 ) 6452 6453 self._add_comments(this) 6454 return self._parse_bracket(this) 6455 6456 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6457 if self._match(TokenType.COLON): 6458 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6459 return this 6460 6461 def _parse_case(self) -> t.Optional[exp.Expression]: 6462 ifs = [] 6463 default = None 6464 6465 comments = self._prev_comments 6466 expression = self._parse_assignment() 6467 6468 while self._match(TokenType.WHEN): 6469 this = self._parse_assignment() 6470 self._match(TokenType.THEN) 6471 then = self._parse_assignment() 6472 ifs.append(self.expression(exp.If, this=this, true=then)) 6473 6474 if self._match(TokenType.ELSE): 6475 default = self._parse_assignment() 6476 6477 if not self._match(TokenType.END): 6478 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6479 default = exp.column("interval") 6480 else: 6481 self.raise_error("Expected END after CASE", self._prev) 6482 6483 return self.expression( 6484 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6485 ) 6486 6487 def _parse_if(self) -> t.Optional[exp.Expression]: 6488 if self._match(TokenType.L_PAREN): 6489 args = self._parse_csv( 6490 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6491 ) 6492 this = self.validate_expression(exp.If.from_arg_list(args), args) 6493 self._match_r_paren() 6494 else: 6495 index = self._index - 1 6496 6497 if self.NO_PAREN_IF_COMMANDS and index == 0: 6498 return self._parse_as_command(self._prev) 6499 6500 condition = self._parse_assignment() 6501 6502 if not condition: 6503 self._retreat(index) 6504 return None 6505 6506 self._match(TokenType.THEN) 6507 true = self._parse_assignment() 6508 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6509 self._match(TokenType.END) 6510 this = self.expression(exp.If, this=condition, true=true, false=false) 6511 6512 return this 6513 6514 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6515 if not self._match_text_seq("VALUE", "FOR"): 6516 self._retreat(self._index - 1) 6517 return None 6518 6519 return self.expression( 6520 exp.NextValueFor, 6521 this=self._parse_column(), 6522 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6523 ) 6524 6525 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6526 this = self._parse_function() or self._parse_var_or_string(upper=True) 6527 6528 if self._match(TokenType.FROM): 6529 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6530 6531 if not self._match(TokenType.COMMA): 6532 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6533 6534 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6535 6536 def _parse_gap_fill(self) -> exp.GapFill: 6537 self._match(TokenType.TABLE) 6538 this = self._parse_table() 6539 6540 self._match(TokenType.COMMA) 6541 args = [this, *self._parse_csv(self._parse_lambda)] 6542 6543 gap_fill = exp.GapFill.from_arg_list(args) 6544 return self.validate_expression(gap_fill, args) 6545 6546 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6547 this = self._parse_assignment() 6548 6549 if not self._match(TokenType.ALIAS): 6550 if self._match(TokenType.COMMA): 6551 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6552 6553 self.raise_error("Expected AS after CAST") 6554 6555 fmt = None 6556 to = self._parse_types() 6557 6558 default = self._match(TokenType.DEFAULT) 6559 if default: 6560 default = self._parse_bitwise() 6561 self._match_text_seq("ON", "CONVERSION", "ERROR") 6562 6563 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6564 fmt_string = self._parse_string() 6565 fmt = self._parse_at_time_zone(fmt_string) 6566 6567 if not to: 6568 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6569 if to.this in exp.DataType.TEMPORAL_TYPES: 6570 this = self.expression( 6571 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6572 this=this, 6573 format=exp.Literal.string( 6574 format_time( 6575 fmt_string.this if fmt_string else "", 6576 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6577 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6578 ) 6579 ), 6580 safe=safe, 6581 ) 6582 6583 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6584 this.set("zone", fmt.args["zone"]) 6585 return this 6586 elif not to: 6587 self.raise_error("Expected TYPE after CAST") 6588 elif isinstance(to, exp.Identifier): 6589 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6590 elif to.this == exp.DataType.Type.CHAR: 6591 if self._match(TokenType.CHARACTER_SET): 6592 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6593 6594 return self.build_cast( 6595 strict=strict, 6596 this=this, 6597 to=to, 6598 format=fmt, 6599 safe=safe, 6600 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6601 default=default, 6602 ) 6603 6604 def _parse_string_agg(self) -> exp.GroupConcat: 6605 if self._match(TokenType.DISTINCT): 6606 args: t.List[t.Optional[exp.Expression]] = [ 6607 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6608 ] 6609 if self._match(TokenType.COMMA): 6610 args.extend(self._parse_csv(self._parse_assignment)) 6611 else: 6612 args = self._parse_csv(self._parse_assignment) # type: ignore 6613 6614 if self._match_text_seq("ON", "OVERFLOW"): 6615 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6616 if self._match_text_seq("ERROR"): 6617 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6618 else: 6619 self._match_text_seq("TRUNCATE") 6620 on_overflow = self.expression( 6621 exp.OverflowTruncateBehavior, 6622 this=self._parse_string(), 6623 with_count=( 6624 self._match_text_seq("WITH", "COUNT") 6625 or not self._match_text_seq("WITHOUT", "COUNT") 6626 ), 6627 ) 6628 else: 6629 on_overflow = None 6630 6631 index = self._index 6632 if not self._match(TokenType.R_PAREN) and args: 6633 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6634 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6635 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6636 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6637 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6638 6639 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6640 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6641 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6642 if not self._match_text_seq("WITHIN", "GROUP"): 6643 self._retreat(index) 6644 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6645 6646 # The corresponding match_r_paren will be called in parse_function (caller) 6647 self._match_l_paren() 6648 6649 return self.expression( 6650 exp.GroupConcat, 6651 this=self._parse_order(this=seq_get(args, 0)), 6652 separator=seq_get(args, 1), 6653 on_overflow=on_overflow, 6654 ) 6655 6656 def _parse_convert( 6657 self, strict: bool, safe: t.Optional[bool] = None 6658 ) -> t.Optional[exp.Expression]: 6659 this = self._parse_bitwise() 6660 6661 if self._match(TokenType.USING): 6662 to: t.Optional[exp.Expression] = self.expression( 6663 exp.CharacterSet, this=self._parse_var() 6664 ) 6665 elif self._match(TokenType.COMMA): 6666 to = self._parse_types() 6667 else: 6668 to = None 6669 6670 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6671 6672 def _parse_xml_table(self) -> exp.XMLTable: 6673 namespaces = None 6674 passing = None 6675 columns = None 6676 6677 if self._match_text_seq("XMLNAMESPACES", "("): 6678 namespaces = self._parse_xml_namespace() 6679 self._match_text_seq(")", ",") 6680 6681 this = self._parse_string() 6682 6683 if self._match_text_seq("PASSING"): 6684 # The BY VALUE keywords are optional and are provided for semantic clarity 6685 self._match_text_seq("BY", "VALUE") 6686 passing = self._parse_csv(self._parse_column) 6687 6688 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6689 6690 if self._match_text_seq("COLUMNS"): 6691 columns = self._parse_csv(self._parse_field_def) 6692 6693 return self.expression( 6694 exp.XMLTable, 6695 this=this, 6696 namespaces=namespaces, 6697 passing=passing, 6698 columns=columns, 6699 by_ref=by_ref, 6700 ) 6701 6702 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6703 namespaces = [] 6704 6705 while True: 6706 if self._match(TokenType.DEFAULT): 6707 uri = self._parse_string() 6708 else: 6709 uri = self._parse_alias(self._parse_string()) 6710 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6711 if not self._match(TokenType.COMMA): 6712 break 6713 6714 return namespaces 6715 6716 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6717 args = self._parse_csv(self._parse_assignment) 6718 6719 if len(args) < 3: 6720 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6721 6722 return self.expression(exp.DecodeCase, expressions=args) 6723 6724 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6725 self._match_text_seq("KEY") 6726 key = self._parse_column() 6727 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6728 self._match_text_seq("VALUE") 6729 value = self._parse_bitwise() 6730 6731 if not key and not value: 6732 return None 6733 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6734 6735 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6736 if not this or not self._match_text_seq("FORMAT", "JSON"): 6737 return this 6738 6739 return self.expression(exp.FormatJson, this=this) 6740 6741 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6742 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6743 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6744 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6745 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6746 else: 6747 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6748 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6749 6750 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6751 6752 if not empty and not error and not null: 6753 return None 6754 6755 return self.expression( 6756 exp.OnCondition, 6757 empty=empty, 6758 error=error, 6759 null=null, 6760 ) 6761 6762 def _parse_on_handling( 6763 self, on: str, *values: str 6764 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6765 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6766 for value in values: 6767 if self._match_text_seq(value, "ON", on): 6768 return f"{value} ON {on}" 6769 6770 index = self._index 6771 if self._match(TokenType.DEFAULT): 6772 default_value = self._parse_bitwise() 6773 if self._match_text_seq("ON", on): 6774 return default_value 6775 6776 self._retreat(index) 6777 6778 return None 6779 6780 @t.overload 6781 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6782 6783 @t.overload 6784 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6785 6786 def _parse_json_object(self, agg=False): 6787 star = self._parse_star() 6788 expressions = ( 6789 [star] 6790 if star 6791 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6792 ) 6793 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6794 6795 unique_keys = None 6796 if self._match_text_seq("WITH", "UNIQUE"): 6797 unique_keys = True 6798 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6799 unique_keys = False 6800 6801 self._match_text_seq("KEYS") 6802 6803 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6804 self._parse_type() 6805 ) 6806 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6807 6808 return self.expression( 6809 exp.JSONObjectAgg if agg else exp.JSONObject, 6810 expressions=expressions, 6811 null_handling=null_handling, 6812 unique_keys=unique_keys, 6813 return_type=return_type, 6814 encoding=encoding, 6815 ) 6816 6817 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6818 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6819 if not self._match_text_seq("NESTED"): 6820 this = self._parse_id_var() 6821 kind = self._parse_types(allow_identifiers=False) 6822 nested = None 6823 else: 6824 this = None 6825 kind = None 6826 nested = True 6827 6828 path = self._match_text_seq("PATH") and self._parse_string() 6829 nested_schema = nested and self._parse_json_schema() 6830 6831 return self.expression( 6832 exp.JSONColumnDef, 6833 this=this, 6834 kind=kind, 6835 path=path, 6836 nested_schema=nested_schema, 6837 ) 6838 6839 def _parse_json_schema(self) -> exp.JSONSchema: 6840 self._match_text_seq("COLUMNS") 6841 return self.expression( 6842 exp.JSONSchema, 6843 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6844 ) 6845 6846 def _parse_json_table(self) -> exp.JSONTable: 6847 this = self._parse_format_json(self._parse_bitwise()) 6848 path = self._match(TokenType.COMMA) and self._parse_string() 6849 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6850 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6851 schema = self._parse_json_schema() 6852 6853 return exp.JSONTable( 6854 this=this, 6855 schema=schema, 6856 path=path, 6857 error_handling=error_handling, 6858 empty_handling=empty_handling, 6859 ) 6860 6861 def _parse_match_against(self) -> exp.MatchAgainst: 6862 expressions = self._parse_csv(self._parse_column) 6863 6864 self._match_text_seq(")", "AGAINST", "(") 6865 6866 this = self._parse_string() 6867 6868 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6869 modifier = "IN NATURAL LANGUAGE MODE" 6870 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6871 modifier = f"{modifier} WITH QUERY EXPANSION" 6872 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6873 modifier = "IN BOOLEAN MODE" 6874 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6875 modifier = "WITH QUERY EXPANSION" 6876 else: 6877 modifier = None 6878 6879 return self.expression( 6880 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6881 ) 6882 6883 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6884 def _parse_open_json(self) -> exp.OpenJSON: 6885 this = self._parse_bitwise() 6886 path = self._match(TokenType.COMMA) and self._parse_string() 6887 6888 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6889 this = self._parse_field(any_token=True) 6890 kind = self._parse_types() 6891 path = self._parse_string() 6892 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6893 6894 return self.expression( 6895 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6896 ) 6897 6898 expressions = None 6899 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6900 self._match_l_paren() 6901 expressions = self._parse_csv(_parse_open_json_column_def) 6902 6903 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6904 6905 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6906 args = self._parse_csv(self._parse_bitwise) 6907 6908 if self._match(TokenType.IN): 6909 return self.expression( 6910 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6911 ) 6912 6913 if haystack_first: 6914 haystack = seq_get(args, 0) 6915 needle = seq_get(args, 1) 6916 else: 6917 haystack = seq_get(args, 1) 6918 needle = seq_get(args, 0) 6919 6920 return self.expression( 6921 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6922 ) 6923 6924 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6925 args = self._parse_csv(self._parse_table) 6926 return exp.JoinHint(this=func_name.upper(), expressions=args) 6927 6928 def _parse_substring(self) -> exp.Substring: 6929 # Postgres supports the form: substring(string [from int] [for int]) 6930 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6931 6932 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6933 6934 if self._match(TokenType.FROM): 6935 args.append(self._parse_bitwise()) 6936 if self._match(TokenType.FOR): 6937 if len(args) == 1: 6938 args.append(exp.Literal.number(1)) 6939 args.append(self._parse_bitwise()) 6940 6941 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6942 6943 def _parse_trim(self) -> exp.Trim: 6944 # https://www.w3resource.com/sql/character-functions/trim.php 6945 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6946 6947 position = None 6948 collation = None 6949 expression = None 6950 6951 if self._match_texts(self.TRIM_TYPES): 6952 position = self._prev.text.upper() 6953 6954 this = self._parse_bitwise() 6955 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6956 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6957 expression = self._parse_bitwise() 6958 6959 if invert_order: 6960 this, expression = expression, this 6961 6962 if self._match(TokenType.COLLATE): 6963 collation = self._parse_bitwise() 6964 6965 return self.expression( 6966 exp.Trim, this=this, position=position, expression=expression, collation=collation 6967 ) 6968 6969 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6970 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6971 6972 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6973 return self._parse_window(self._parse_id_var(), alias=True) 6974 6975 def _parse_respect_or_ignore_nulls( 6976 self, this: t.Optional[exp.Expression] 6977 ) -> t.Optional[exp.Expression]: 6978 if self._match_text_seq("IGNORE", "NULLS"): 6979 return self.expression(exp.IgnoreNulls, this=this) 6980 if self._match_text_seq("RESPECT", "NULLS"): 6981 return self.expression(exp.RespectNulls, this=this) 6982 return this 6983 6984 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6985 if self._match(TokenType.HAVING): 6986 self._match_texts(("MAX", "MIN")) 6987 max = self._prev.text.upper() != "MIN" 6988 return self.expression( 6989 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6990 ) 6991 6992 return this 6993 6994 def _parse_window( 6995 self, this: t.Optional[exp.Expression], alias: bool = False 6996 ) -> t.Optional[exp.Expression]: 6997 func = this 6998 comments = func.comments if isinstance(func, exp.Expression) else None 6999 7000 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7001 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7002 if self._match_text_seq("WITHIN", "GROUP"): 7003 order = self._parse_wrapped(self._parse_order) 7004 this = self.expression(exp.WithinGroup, this=this, expression=order) 7005 7006 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7007 self._match(TokenType.WHERE) 7008 this = self.expression( 7009 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7010 ) 7011 self._match_r_paren() 7012 7013 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7014 # Some dialects choose to implement and some do not. 7015 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7016 7017 # There is some code above in _parse_lambda that handles 7018 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7019 7020 # The below changes handle 7021 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7022 7023 # Oracle allows both formats 7024 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7025 # and Snowflake chose to do the same for familiarity 7026 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7027 if isinstance(this, exp.AggFunc): 7028 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7029 7030 if ignore_respect and ignore_respect is not this: 7031 ignore_respect.replace(ignore_respect.this) 7032 this = self.expression(ignore_respect.__class__, this=this) 7033 7034 this = self._parse_respect_or_ignore_nulls(this) 7035 7036 # bigquery select from window x AS (partition by ...) 7037 if alias: 7038 over = None 7039 self._match(TokenType.ALIAS) 7040 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7041 return this 7042 else: 7043 over = self._prev.text.upper() 7044 7045 if comments and isinstance(func, exp.Expression): 7046 func.pop_comments() 7047 7048 if not self._match(TokenType.L_PAREN): 7049 return self.expression( 7050 exp.Window, 7051 comments=comments, 7052 this=this, 7053 alias=self._parse_id_var(False), 7054 over=over, 7055 ) 7056 7057 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7058 7059 first = self._match(TokenType.FIRST) 7060 if self._match_text_seq("LAST"): 7061 first = False 7062 7063 partition, order = self._parse_partition_and_order() 7064 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7065 7066 if kind: 7067 self._match(TokenType.BETWEEN) 7068 start = self._parse_window_spec() 7069 self._match(TokenType.AND) 7070 end = self._parse_window_spec() 7071 exclude = ( 7072 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7073 if self._match_text_seq("EXCLUDE") 7074 else None 7075 ) 7076 7077 spec = self.expression( 7078 exp.WindowSpec, 7079 kind=kind, 7080 start=start["value"], 7081 start_side=start["side"], 7082 end=end["value"], 7083 end_side=end["side"], 7084 exclude=exclude, 7085 ) 7086 else: 7087 spec = None 7088 7089 self._match_r_paren() 7090 7091 window = self.expression( 7092 exp.Window, 7093 comments=comments, 7094 this=this, 7095 partition_by=partition, 7096 order=order, 7097 spec=spec, 7098 alias=window_alias, 7099 over=over, 7100 first=first, 7101 ) 7102 7103 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7104 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7105 return self._parse_window(window, alias=alias) 7106 7107 return window 7108 7109 def _parse_partition_and_order( 7110 self, 7111 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7112 return self._parse_partition_by(), self._parse_order() 7113 7114 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7115 self._match(TokenType.BETWEEN) 7116 7117 return { 7118 "value": ( 7119 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7120 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7121 or self._parse_bitwise() 7122 ), 7123 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7124 } 7125 7126 def _parse_alias( 7127 self, this: t.Optional[exp.Expression], explicit: bool = False 7128 ) -> t.Optional[exp.Expression]: 7129 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7130 # so this section tries to parse the clause version and if it fails, it treats the token 7131 # as an identifier (alias) 7132 if self._can_parse_limit_or_offset(): 7133 return this 7134 7135 any_token = self._match(TokenType.ALIAS) 7136 comments = self._prev_comments or [] 7137 7138 if explicit and not any_token: 7139 return this 7140 7141 if self._match(TokenType.L_PAREN): 7142 aliases = self.expression( 7143 exp.Aliases, 7144 comments=comments, 7145 this=this, 7146 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7147 ) 7148 self._match_r_paren(aliases) 7149 return aliases 7150 7151 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7152 self.STRING_ALIASES and self._parse_string_as_identifier() 7153 ) 7154 7155 if alias: 7156 comments.extend(alias.pop_comments()) 7157 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7158 column = this.this 7159 7160 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7161 if not this.comments and column and column.comments: 7162 this.comments = column.pop_comments() 7163 7164 return this 7165 7166 def _parse_id_var( 7167 self, 7168 any_token: bool = True, 7169 tokens: t.Optional[t.Collection[TokenType]] = None, 7170 ) -> t.Optional[exp.Expression]: 7171 expression = self._parse_identifier() 7172 if not expression and ( 7173 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7174 ): 7175 quoted = self._prev.token_type == TokenType.STRING 7176 expression = self._identifier_expression(quoted=quoted) 7177 7178 return expression 7179 7180 def _parse_string(self) -> t.Optional[exp.Expression]: 7181 if self._match_set(self.STRING_PARSERS): 7182 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7183 return self._parse_placeholder() 7184 7185 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7186 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7187 if output: 7188 output.update_positions(self._prev) 7189 return output 7190 7191 def _parse_number(self) -> t.Optional[exp.Expression]: 7192 if self._match_set(self.NUMERIC_PARSERS): 7193 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7194 return self._parse_placeholder() 7195 7196 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7197 if self._match(TokenType.IDENTIFIER): 7198 return self._identifier_expression(quoted=True) 7199 return self._parse_placeholder() 7200 7201 def _parse_var( 7202 self, 7203 any_token: bool = False, 7204 tokens: t.Optional[t.Collection[TokenType]] = None, 7205 upper: bool = False, 7206 ) -> t.Optional[exp.Expression]: 7207 if ( 7208 (any_token and self._advance_any()) 7209 or self._match(TokenType.VAR) 7210 or (self._match_set(tokens) if tokens else False) 7211 ): 7212 return self.expression( 7213 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7214 ) 7215 return self._parse_placeholder() 7216 7217 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7218 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7219 self._advance() 7220 return self._prev 7221 return None 7222 7223 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7224 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7225 7226 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7227 return self._parse_primary() or self._parse_var(any_token=True) 7228 7229 def _parse_null(self) -> t.Optional[exp.Expression]: 7230 if self._match_set(self.NULL_TOKENS): 7231 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7232 return self._parse_placeholder() 7233 7234 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7235 if self._match(TokenType.TRUE): 7236 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7237 if self._match(TokenType.FALSE): 7238 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7239 return self._parse_placeholder() 7240 7241 def _parse_star(self) -> t.Optional[exp.Expression]: 7242 if self._match(TokenType.STAR): 7243 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7244 return self._parse_placeholder() 7245 7246 def _parse_parameter(self) -> exp.Parameter: 7247 this = self._parse_identifier() or self._parse_primary_or_var() 7248 return self.expression(exp.Parameter, this=this) 7249 7250 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7251 if self._match_set(self.PLACEHOLDER_PARSERS): 7252 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7253 if placeholder: 7254 return placeholder 7255 self._advance(-1) 7256 return None 7257 7258 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7259 if not self._match_texts(keywords): 7260 return None 7261 if self._match(TokenType.L_PAREN, advance=False): 7262 return self._parse_wrapped_csv(self._parse_expression) 7263 7264 expression = self._parse_expression() 7265 return [expression] if expression else None 7266 7267 def _parse_csv( 7268 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7269 ) -> t.List[exp.Expression]: 7270 parse_result = parse_method() 7271 items = [parse_result] if parse_result is not None else [] 7272 7273 while self._match(sep): 7274 self._add_comments(parse_result) 7275 parse_result = parse_method() 7276 if parse_result is not None: 7277 items.append(parse_result) 7278 7279 return items 7280 7281 def _parse_tokens( 7282 self, parse_method: t.Callable, expressions: t.Dict 7283 ) -> t.Optional[exp.Expression]: 7284 this = parse_method() 7285 7286 while self._match_set(expressions): 7287 this = self.expression( 7288 expressions[self._prev.token_type], 7289 this=this, 7290 comments=self._prev_comments, 7291 expression=parse_method(), 7292 ) 7293 7294 return this 7295 7296 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7297 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7298 7299 def _parse_wrapped_csv( 7300 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7301 ) -> t.List[exp.Expression]: 7302 return self._parse_wrapped( 7303 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7304 ) 7305 7306 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7307 wrapped = self._match(TokenType.L_PAREN) 7308 if not wrapped and not optional: 7309 self.raise_error("Expecting (") 7310 parse_result = parse_method() 7311 if wrapped: 7312 self._match_r_paren() 7313 return parse_result 7314 7315 def _parse_expressions(self) -> t.List[exp.Expression]: 7316 return self._parse_csv(self._parse_expression) 7317 7318 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7319 return self._parse_select() or self._parse_set_operations( 7320 self._parse_alias(self._parse_assignment(), explicit=True) 7321 if alias 7322 else self._parse_assignment() 7323 ) 7324 7325 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7326 return self._parse_query_modifiers( 7327 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7328 ) 7329 7330 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7331 this = None 7332 if self._match_texts(self.TRANSACTION_KIND): 7333 this = self._prev.text 7334 7335 self._match_texts(("TRANSACTION", "WORK")) 7336 7337 modes = [] 7338 while True: 7339 mode = [] 7340 while self._match(TokenType.VAR): 7341 mode.append(self._prev.text) 7342 7343 if mode: 7344 modes.append(" ".join(mode)) 7345 if not self._match(TokenType.COMMA): 7346 break 7347 7348 return self.expression(exp.Transaction, this=this, modes=modes) 7349 7350 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7351 chain = None 7352 savepoint = None 7353 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7354 7355 self._match_texts(("TRANSACTION", "WORK")) 7356 7357 if self._match_text_seq("TO"): 7358 self._match_text_seq("SAVEPOINT") 7359 savepoint = self._parse_id_var() 7360 7361 if self._match(TokenType.AND): 7362 chain = not self._match_text_seq("NO") 7363 self._match_text_seq("CHAIN") 7364 7365 if is_rollback: 7366 return self.expression(exp.Rollback, savepoint=savepoint) 7367 7368 return self.expression(exp.Commit, chain=chain) 7369 7370 def _parse_refresh(self) -> exp.Refresh: 7371 self._match(TokenType.TABLE) 7372 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7373 7374 def _parse_column_def_with_exists(self): 7375 start = self._index 7376 self._match(TokenType.COLUMN) 7377 7378 exists_column = self._parse_exists(not_=True) 7379 expression = self._parse_field_def() 7380 7381 if not isinstance(expression, exp.ColumnDef): 7382 self._retreat(start) 7383 return None 7384 7385 expression.set("exists", exists_column) 7386 7387 return expression 7388 7389 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7390 if not self._prev.text.upper() == "ADD": 7391 return None 7392 7393 expression = self._parse_column_def_with_exists() 7394 if not expression: 7395 return None 7396 7397 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7398 if self._match_texts(("FIRST", "AFTER")): 7399 position = self._prev.text 7400 column_position = self.expression( 7401 exp.ColumnPosition, this=self._parse_column(), position=position 7402 ) 7403 expression.set("position", column_position) 7404 7405 return expression 7406 7407 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7408 drop = self._match(TokenType.DROP) and self._parse_drop() 7409 if drop and not isinstance(drop, exp.Command): 7410 drop.set("kind", drop.args.get("kind", "COLUMN")) 7411 return drop 7412 7413 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7414 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7415 return self.expression( 7416 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7417 ) 7418 7419 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7420 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7421 self._match_text_seq("ADD") 7422 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7423 return self.expression( 7424 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7425 ) 7426 7427 column_def = self._parse_add_column() 7428 if isinstance(column_def, exp.ColumnDef): 7429 return column_def 7430 7431 exists = self._parse_exists(not_=True) 7432 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7433 return self.expression( 7434 exp.AddPartition, 7435 exists=exists, 7436 this=self._parse_field(any_token=True), 7437 location=self._match_text_seq("LOCATION", advance=False) 7438 and self._parse_property(), 7439 ) 7440 7441 return None 7442 7443 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7444 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7445 or self._match_text_seq("COLUMNS") 7446 ): 7447 schema = self._parse_schema() 7448 7449 return ( 7450 ensure_list(schema) 7451 if schema 7452 else self._parse_csv(self._parse_column_def_with_exists) 7453 ) 7454 7455 return self._parse_csv(_parse_add_alteration) 7456 7457 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7458 if self._match_texts(self.ALTER_ALTER_PARSERS): 7459 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7460 7461 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7462 # keyword after ALTER we default to parsing this statement 7463 self._match(TokenType.COLUMN) 7464 column = self._parse_field(any_token=True) 7465 7466 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7467 return self.expression(exp.AlterColumn, this=column, drop=True) 7468 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7469 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7470 if self._match(TokenType.COMMENT): 7471 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7472 if self._match_text_seq("DROP", "NOT", "NULL"): 7473 return self.expression( 7474 exp.AlterColumn, 7475 this=column, 7476 drop=True, 7477 allow_null=True, 7478 ) 7479 if self._match_text_seq("SET", "NOT", "NULL"): 7480 return self.expression( 7481 exp.AlterColumn, 7482 this=column, 7483 allow_null=False, 7484 ) 7485 7486 if self._match_text_seq("SET", "VISIBLE"): 7487 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7488 if self._match_text_seq("SET", "INVISIBLE"): 7489 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7490 7491 self._match_text_seq("SET", "DATA") 7492 self._match_text_seq("TYPE") 7493 return self.expression( 7494 exp.AlterColumn, 7495 this=column, 7496 dtype=self._parse_types(), 7497 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7498 using=self._match(TokenType.USING) and self._parse_assignment(), 7499 ) 7500 7501 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7502 if self._match_texts(("ALL", "EVEN", "AUTO")): 7503 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7504 7505 self._match_text_seq("KEY", "DISTKEY") 7506 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7507 7508 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7509 if compound: 7510 self._match_text_seq("SORTKEY") 7511 7512 if self._match(TokenType.L_PAREN, advance=False): 7513 return self.expression( 7514 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7515 ) 7516 7517 self._match_texts(("AUTO", "NONE")) 7518 return self.expression( 7519 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7520 ) 7521 7522 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7523 index = self._index - 1 7524 7525 partition_exists = self._parse_exists() 7526 if self._match(TokenType.PARTITION, advance=False): 7527 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7528 7529 self._retreat(index) 7530 return self._parse_csv(self._parse_drop_column) 7531 7532 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7533 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7534 exists = self._parse_exists() 7535 old_column = self._parse_column() 7536 to = self._match_text_seq("TO") 7537 new_column = self._parse_column() 7538 7539 if old_column is None or to is None or new_column is None: 7540 return None 7541 7542 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7543 7544 self._match_text_seq("TO") 7545 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7546 7547 def _parse_alter_table_set(self) -> exp.AlterSet: 7548 alter_set = self.expression(exp.AlterSet) 7549 7550 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7551 "TABLE", "PROPERTIES" 7552 ): 7553 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7554 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7555 alter_set.set("expressions", [self._parse_assignment()]) 7556 elif self._match_texts(("LOGGED", "UNLOGGED")): 7557 alter_set.set("option", exp.var(self._prev.text.upper())) 7558 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7559 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7560 elif self._match_text_seq("LOCATION"): 7561 alter_set.set("location", self._parse_field()) 7562 elif self._match_text_seq("ACCESS", "METHOD"): 7563 alter_set.set("access_method", self._parse_field()) 7564 elif self._match_text_seq("TABLESPACE"): 7565 alter_set.set("tablespace", self._parse_field()) 7566 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7567 alter_set.set("file_format", [self._parse_field()]) 7568 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7569 alter_set.set("file_format", self._parse_wrapped_options()) 7570 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7571 alter_set.set("copy_options", self._parse_wrapped_options()) 7572 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7573 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7574 else: 7575 if self._match_text_seq("SERDE"): 7576 alter_set.set("serde", self._parse_field()) 7577 7578 properties = self._parse_wrapped(self._parse_properties, optional=True) 7579 alter_set.set("expressions", [properties]) 7580 7581 return alter_set 7582 7583 def _parse_alter(self) -> exp.Alter | exp.Command: 7584 start = self._prev 7585 7586 alter_token = self._match_set(self.ALTERABLES) and self._prev 7587 if not alter_token: 7588 return self._parse_as_command(start) 7589 7590 exists = self._parse_exists() 7591 only = self._match_text_seq("ONLY") 7592 this = self._parse_table(schema=True) 7593 check = self._match_text_seq("WITH", "CHECK") 7594 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7595 7596 if self._next: 7597 self._advance() 7598 7599 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7600 if parser: 7601 actions = ensure_list(parser(self)) 7602 not_valid = self._match_text_seq("NOT", "VALID") 7603 options = self._parse_csv(self._parse_property) 7604 7605 if not self._curr and actions: 7606 return self.expression( 7607 exp.Alter, 7608 this=this, 7609 kind=alter_token.text.upper(), 7610 exists=exists, 7611 actions=actions, 7612 only=only, 7613 options=options, 7614 cluster=cluster, 7615 not_valid=not_valid, 7616 check=check, 7617 ) 7618 7619 return self._parse_as_command(start) 7620 7621 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7622 start = self._prev 7623 # https://duckdb.org/docs/sql/statements/analyze 7624 if not self._curr: 7625 return self.expression(exp.Analyze) 7626 7627 options = [] 7628 while self._match_texts(self.ANALYZE_STYLES): 7629 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7630 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7631 else: 7632 options.append(self._prev.text.upper()) 7633 7634 this: t.Optional[exp.Expression] = None 7635 inner_expression: t.Optional[exp.Expression] = None 7636 7637 kind = self._curr and self._curr.text.upper() 7638 7639 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7640 this = self._parse_table_parts() 7641 elif self._match_text_seq("TABLES"): 7642 if self._match_set((TokenType.FROM, TokenType.IN)): 7643 kind = f"{kind} {self._prev.text.upper()}" 7644 this = self._parse_table(schema=True, is_db_reference=True) 7645 elif self._match_text_seq("DATABASE"): 7646 this = self._parse_table(schema=True, is_db_reference=True) 7647 elif self._match_text_seq("CLUSTER"): 7648 this = self._parse_table() 7649 # Try matching inner expr keywords before fallback to parse table. 7650 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7651 kind = None 7652 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7653 else: 7654 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7655 kind = None 7656 this = self._parse_table_parts() 7657 7658 partition = self._try_parse(self._parse_partition) 7659 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7660 return self._parse_as_command(start) 7661 7662 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7663 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7664 "WITH", "ASYNC", "MODE" 7665 ): 7666 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7667 else: 7668 mode = None 7669 7670 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7671 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7672 7673 properties = self._parse_properties() 7674 return self.expression( 7675 exp.Analyze, 7676 kind=kind, 7677 this=this, 7678 mode=mode, 7679 partition=partition, 7680 properties=properties, 7681 expression=inner_expression, 7682 options=options, 7683 ) 7684 7685 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7686 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7687 this = None 7688 kind = self._prev.text.upper() 7689 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7690 expressions = [] 7691 7692 if not self._match_text_seq("STATISTICS"): 7693 self.raise_error("Expecting token STATISTICS") 7694 7695 if self._match_text_seq("NOSCAN"): 7696 this = "NOSCAN" 7697 elif self._match(TokenType.FOR): 7698 if self._match_text_seq("ALL", "COLUMNS"): 7699 this = "FOR ALL COLUMNS" 7700 if self._match_texts("COLUMNS"): 7701 this = "FOR COLUMNS" 7702 expressions = self._parse_csv(self._parse_column_reference) 7703 elif self._match_text_seq("SAMPLE"): 7704 sample = self._parse_number() 7705 expressions = [ 7706 self.expression( 7707 exp.AnalyzeSample, 7708 sample=sample, 7709 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7710 ) 7711 ] 7712 7713 return self.expression( 7714 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7715 ) 7716 7717 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7718 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7719 kind = None 7720 this = None 7721 expression: t.Optional[exp.Expression] = None 7722 if self._match_text_seq("REF", "UPDATE"): 7723 kind = "REF" 7724 this = "UPDATE" 7725 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7726 this = "UPDATE SET DANGLING TO NULL" 7727 elif self._match_text_seq("STRUCTURE"): 7728 kind = "STRUCTURE" 7729 if self._match_text_seq("CASCADE", "FAST"): 7730 this = "CASCADE FAST" 7731 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7732 ("ONLINE", "OFFLINE") 7733 ): 7734 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7735 expression = self._parse_into() 7736 7737 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7738 7739 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7740 this = self._prev.text.upper() 7741 if self._match_text_seq("COLUMNS"): 7742 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7743 return None 7744 7745 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7746 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7747 if self._match_text_seq("STATISTICS"): 7748 return self.expression(exp.AnalyzeDelete, kind=kind) 7749 return None 7750 7751 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7752 if self._match_text_seq("CHAINED", "ROWS"): 7753 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7754 return None 7755 7756 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7757 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7758 this = self._prev.text.upper() 7759 expression: t.Optional[exp.Expression] = None 7760 expressions = [] 7761 update_options = None 7762 7763 if self._match_text_seq("HISTOGRAM", "ON"): 7764 expressions = self._parse_csv(self._parse_column_reference) 7765 with_expressions = [] 7766 while self._match(TokenType.WITH): 7767 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7768 if self._match_texts(("SYNC", "ASYNC")): 7769 if self._match_text_seq("MODE", advance=False): 7770 with_expressions.append(f"{self._prev.text.upper()} MODE") 7771 self._advance() 7772 else: 7773 buckets = self._parse_number() 7774 if self._match_text_seq("BUCKETS"): 7775 with_expressions.append(f"{buckets} BUCKETS") 7776 if with_expressions: 7777 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7778 7779 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7780 TokenType.UPDATE, advance=False 7781 ): 7782 update_options = self._prev.text.upper() 7783 self._advance() 7784 elif self._match_text_seq("USING", "DATA"): 7785 expression = self.expression(exp.UsingData, this=self._parse_string()) 7786 7787 return self.expression( 7788 exp.AnalyzeHistogram, 7789 this=this, 7790 expressions=expressions, 7791 expression=expression, 7792 update_options=update_options, 7793 ) 7794 7795 def _parse_merge(self) -> exp.Merge: 7796 self._match(TokenType.INTO) 7797 target = self._parse_table() 7798 7799 if target and self._match(TokenType.ALIAS, advance=False): 7800 target.set("alias", self._parse_table_alias()) 7801 7802 self._match(TokenType.USING) 7803 using = self._parse_table() 7804 7805 self._match(TokenType.ON) 7806 on = self._parse_assignment() 7807 7808 return self.expression( 7809 exp.Merge, 7810 this=target, 7811 using=using, 7812 on=on, 7813 whens=self._parse_when_matched(), 7814 returning=self._parse_returning(), 7815 ) 7816 7817 def _parse_when_matched(self) -> exp.Whens: 7818 whens = [] 7819 7820 while self._match(TokenType.WHEN): 7821 matched = not self._match(TokenType.NOT) 7822 self._match_text_seq("MATCHED") 7823 source = ( 7824 False 7825 if self._match_text_seq("BY", "TARGET") 7826 else self._match_text_seq("BY", "SOURCE") 7827 ) 7828 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7829 7830 self._match(TokenType.THEN) 7831 7832 if self._match(TokenType.INSERT): 7833 this = self._parse_star() 7834 if this: 7835 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7836 else: 7837 then = self.expression( 7838 exp.Insert, 7839 this=exp.var("ROW") 7840 if self._match_text_seq("ROW") 7841 else self._parse_value(values=False), 7842 expression=self._match_text_seq("VALUES") and self._parse_value(), 7843 ) 7844 elif self._match(TokenType.UPDATE): 7845 expressions = self._parse_star() 7846 if expressions: 7847 then = self.expression(exp.Update, expressions=expressions) 7848 else: 7849 then = self.expression( 7850 exp.Update, 7851 expressions=self._match(TokenType.SET) 7852 and self._parse_csv(self._parse_equality), 7853 ) 7854 elif self._match(TokenType.DELETE): 7855 then = self.expression(exp.Var, this=self._prev.text) 7856 else: 7857 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7858 7859 whens.append( 7860 self.expression( 7861 exp.When, 7862 matched=matched, 7863 source=source, 7864 condition=condition, 7865 then=then, 7866 ) 7867 ) 7868 return self.expression(exp.Whens, expressions=whens) 7869 7870 def _parse_show(self) -> t.Optional[exp.Expression]: 7871 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7872 if parser: 7873 return parser(self) 7874 return self._parse_as_command(self._prev) 7875 7876 def _parse_set_item_assignment( 7877 self, kind: t.Optional[str] = None 7878 ) -> t.Optional[exp.Expression]: 7879 index = self._index 7880 7881 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7882 return self._parse_set_transaction(global_=kind == "GLOBAL") 7883 7884 left = self._parse_primary() or self._parse_column() 7885 assignment_delimiter = self._match_texts(("=", "TO")) 7886 7887 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7888 self._retreat(index) 7889 return None 7890 7891 right = self._parse_statement() or self._parse_id_var() 7892 if isinstance(right, (exp.Column, exp.Identifier)): 7893 right = exp.var(right.name) 7894 7895 this = self.expression(exp.EQ, this=left, expression=right) 7896 return self.expression(exp.SetItem, this=this, kind=kind) 7897 7898 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7899 self._match_text_seq("TRANSACTION") 7900 characteristics = self._parse_csv( 7901 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7902 ) 7903 return self.expression( 7904 exp.SetItem, 7905 expressions=characteristics, 7906 kind="TRANSACTION", 7907 **{"global": global_}, # type: ignore 7908 ) 7909 7910 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7911 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7912 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7913 7914 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7915 index = self._index 7916 set_ = self.expression( 7917 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7918 ) 7919 7920 if self._curr: 7921 self._retreat(index) 7922 return self._parse_as_command(self._prev) 7923 7924 return set_ 7925 7926 def _parse_var_from_options( 7927 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7928 ) -> t.Optional[exp.Var]: 7929 start = self._curr 7930 if not start: 7931 return None 7932 7933 option = start.text.upper() 7934 continuations = options.get(option) 7935 7936 index = self._index 7937 self._advance() 7938 for keywords in continuations or []: 7939 if isinstance(keywords, str): 7940 keywords = (keywords,) 7941 7942 if self._match_text_seq(*keywords): 7943 option = f"{option} {' '.join(keywords)}" 7944 break 7945 else: 7946 if continuations or continuations is None: 7947 if raise_unmatched: 7948 self.raise_error(f"Unknown option {option}") 7949 7950 self._retreat(index) 7951 return None 7952 7953 return exp.var(option) 7954 7955 def _parse_as_command(self, start: Token) -> exp.Command: 7956 while self._curr: 7957 self._advance() 7958 text = self._find_sql(start, self._prev) 7959 size = len(start.text) 7960 self._warn_unsupported() 7961 return exp.Command(this=text[:size], expression=text[size:]) 7962 7963 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7964 settings = [] 7965 7966 self._match_l_paren() 7967 kind = self._parse_id_var() 7968 7969 if self._match(TokenType.L_PAREN): 7970 while True: 7971 key = self._parse_id_var() 7972 value = self._parse_primary() 7973 if not key and value is None: 7974 break 7975 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7976 self._match(TokenType.R_PAREN) 7977 7978 self._match_r_paren() 7979 7980 return self.expression( 7981 exp.DictProperty, 7982 this=this, 7983 kind=kind.this if kind else None, 7984 settings=settings, 7985 ) 7986 7987 def _parse_dict_range(self, this: str) -> exp.DictRange: 7988 self._match_l_paren() 7989 has_min = self._match_text_seq("MIN") 7990 if has_min: 7991 min = self._parse_var() or self._parse_primary() 7992 self._match_text_seq("MAX") 7993 max = self._parse_var() or self._parse_primary() 7994 else: 7995 max = self._parse_var() or self._parse_primary() 7996 min = exp.Literal.number(0) 7997 self._match_r_paren() 7998 return self.expression(exp.DictRange, this=this, min=min, max=max) 7999 8000 def _parse_comprehension( 8001 self, this: t.Optional[exp.Expression] 8002 ) -> t.Optional[exp.Comprehension]: 8003 index = self._index 8004 expression = self._parse_column() 8005 if not self._match(TokenType.IN): 8006 self._retreat(index - 1) 8007 return None 8008 iterator = self._parse_column() 8009 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8010 return self.expression( 8011 exp.Comprehension, 8012 this=this, 8013 expression=expression, 8014 iterator=iterator, 8015 condition=condition, 8016 ) 8017 8018 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8019 if self._match(TokenType.HEREDOC_STRING): 8020 return self.expression(exp.Heredoc, this=self._prev.text) 8021 8022 if not self._match_text_seq("$"): 8023 return None 8024 8025 tags = ["$"] 8026 tag_text = None 8027 8028 if self._is_connected(): 8029 self._advance() 8030 tags.append(self._prev.text.upper()) 8031 else: 8032 self.raise_error("No closing $ found") 8033 8034 if tags[-1] != "$": 8035 if self._is_connected() and self._match_text_seq("$"): 8036 tag_text = tags[-1] 8037 tags.append("$") 8038 else: 8039 self.raise_error("No closing $ found") 8040 8041 heredoc_start = self._curr 8042 8043 while self._curr: 8044 if self._match_text_seq(*tags, advance=False): 8045 this = self._find_sql(heredoc_start, self._prev) 8046 self._advance(len(tags)) 8047 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8048 8049 self._advance() 8050 8051 self.raise_error(f"No closing {''.join(tags)} found") 8052 return None 8053 8054 def _find_parser( 8055 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8056 ) -> t.Optional[t.Callable]: 8057 if not self._curr: 8058 return None 8059 8060 index = self._index 8061 this = [] 8062 while True: 8063 # The current token might be multiple words 8064 curr = self._curr.text.upper() 8065 key = curr.split(" ") 8066 this.append(curr) 8067 8068 self._advance() 8069 result, trie = in_trie(trie, key) 8070 if result == TrieResult.FAILED: 8071 break 8072 8073 if result == TrieResult.EXISTS: 8074 subparser = parsers[" ".join(this)] 8075 return subparser 8076 8077 self._retreat(index) 8078 return None 8079 8080 def _match(self, token_type, advance=True, expression=None): 8081 if not self._curr: 8082 return None 8083 8084 if self._curr.token_type == token_type: 8085 if advance: 8086 self._advance() 8087 self._add_comments(expression) 8088 return True 8089 8090 return None 8091 8092 def _match_set(self, types, advance=True): 8093 if not self._curr: 8094 return None 8095 8096 if self._curr.token_type in types: 8097 if advance: 8098 self._advance() 8099 return True 8100 8101 return None 8102 8103 def _match_pair(self, token_type_a, token_type_b, advance=True): 8104 if not self._curr or not self._next: 8105 return None 8106 8107 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8108 if advance: 8109 self._advance(2) 8110 return True 8111 8112 return None 8113 8114 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8115 if not self._match(TokenType.L_PAREN, expression=expression): 8116 self.raise_error("Expecting (") 8117 8118 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8119 if not self._match(TokenType.R_PAREN, expression=expression): 8120 self.raise_error("Expecting )") 8121 8122 def _match_texts(self, texts, advance=True): 8123 if ( 8124 self._curr 8125 and self._curr.token_type != TokenType.STRING 8126 and self._curr.text.upper() in texts 8127 ): 8128 if advance: 8129 self._advance() 8130 return True 8131 return None 8132 8133 def _match_text_seq(self, *texts, advance=True): 8134 index = self._index 8135 for text in texts: 8136 if ( 8137 self._curr 8138 and self._curr.token_type != TokenType.STRING 8139 and self._curr.text.upper() == text 8140 ): 8141 self._advance() 8142 else: 8143 self._retreat(index) 8144 return None 8145 8146 if not advance: 8147 self._retreat(index) 8148 8149 return True 8150 8151 def _replace_lambda( 8152 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8153 ) -> t.Optional[exp.Expression]: 8154 if not node: 8155 return node 8156 8157 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8158 8159 for column in node.find_all(exp.Column): 8160 typ = lambda_types.get(column.parts[0].name) 8161 if typ is not None: 8162 dot_or_id = column.to_dot() if column.table else column.this 8163 8164 if typ: 8165 dot_or_id = self.expression( 8166 exp.Cast, 8167 this=dot_or_id, 8168 to=typ, 8169 ) 8170 8171 parent = column.parent 8172 8173 while isinstance(parent, exp.Dot): 8174 if not isinstance(parent.parent, exp.Dot): 8175 parent.replace(dot_or_id) 8176 break 8177 parent = parent.parent 8178 else: 8179 if column is node: 8180 node = dot_or_id 8181 else: 8182 column.replace(dot_or_id) 8183 return node 8184 8185 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8186 start = self._prev 8187 8188 # Not to be confused with TRUNCATE(number, decimals) function call 8189 if self._match(TokenType.L_PAREN): 8190 self._retreat(self._index - 2) 8191 return self._parse_function() 8192 8193 # Clickhouse supports TRUNCATE DATABASE as well 8194 is_database = self._match(TokenType.DATABASE) 8195 8196 self._match(TokenType.TABLE) 8197 8198 exists = self._parse_exists(not_=False) 8199 8200 expressions = self._parse_csv( 8201 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8202 ) 8203 8204 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8205 8206 if self._match_text_seq("RESTART", "IDENTITY"): 8207 identity = "RESTART" 8208 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8209 identity = "CONTINUE" 8210 else: 8211 identity = None 8212 8213 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8214 option = self._prev.text 8215 else: 8216 option = None 8217 8218 partition = self._parse_partition() 8219 8220 # Fallback case 8221 if self._curr: 8222 return self._parse_as_command(start) 8223 8224 return self.expression( 8225 exp.TruncateTable, 8226 expressions=expressions, 8227 is_database=is_database, 8228 exists=exists, 8229 cluster=cluster, 8230 identity=identity, 8231 option=option, 8232 partition=partition, 8233 ) 8234 8235 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8236 this = self._parse_ordered(self._parse_opclass) 8237 8238 if not self._match(TokenType.WITH): 8239 return this 8240 8241 op = self._parse_var(any_token=True) 8242 8243 return self.expression(exp.WithOperator, this=this, op=op) 8244 8245 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8246 self._match(TokenType.EQ) 8247 self._match(TokenType.L_PAREN) 8248 8249 opts: t.List[t.Optional[exp.Expression]] = [] 8250 option: exp.Expression | None 8251 while self._curr and not self._match(TokenType.R_PAREN): 8252 if self._match_text_seq("FORMAT_NAME", "="): 8253 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8254 option = self._parse_format_name() 8255 else: 8256 option = self._parse_property() 8257 8258 if option is None: 8259 self.raise_error("Unable to parse option") 8260 break 8261 8262 opts.append(option) 8263 8264 return opts 8265 8266 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8267 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8268 8269 options = [] 8270 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8271 option = self._parse_var(any_token=True) 8272 prev = self._prev.text.upper() 8273 8274 # Different dialects might separate options and values by white space, "=" and "AS" 8275 self._match(TokenType.EQ) 8276 self._match(TokenType.ALIAS) 8277 8278 param = self.expression(exp.CopyParameter, this=option) 8279 8280 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8281 TokenType.L_PAREN, advance=False 8282 ): 8283 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8284 param.set("expressions", self._parse_wrapped_options()) 8285 elif prev == "FILE_FORMAT": 8286 # T-SQL's external file format case 8287 param.set("expression", self._parse_field()) 8288 else: 8289 param.set("expression", self._parse_unquoted_field()) 8290 8291 options.append(param) 8292 self._match(sep) 8293 8294 return options 8295 8296 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8297 expr = self.expression(exp.Credentials) 8298 8299 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8300 expr.set("storage", self._parse_field()) 8301 if self._match_text_seq("CREDENTIALS"): 8302 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8303 creds = ( 8304 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8305 ) 8306 expr.set("credentials", creds) 8307 if self._match_text_seq("ENCRYPTION"): 8308 expr.set("encryption", self._parse_wrapped_options()) 8309 if self._match_text_seq("IAM_ROLE"): 8310 expr.set("iam_role", self._parse_field()) 8311 if self._match_text_seq("REGION"): 8312 expr.set("region", self._parse_field()) 8313 8314 return expr 8315 8316 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8317 return self._parse_field() 8318 8319 def _parse_copy(self) -> exp.Copy | exp.Command: 8320 start = self._prev 8321 8322 self._match(TokenType.INTO) 8323 8324 this = ( 8325 self._parse_select(nested=True, parse_subquery_alias=False) 8326 if self._match(TokenType.L_PAREN, advance=False) 8327 else self._parse_table(schema=True) 8328 ) 8329 8330 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8331 8332 files = self._parse_csv(self._parse_file_location) 8333 credentials = self._parse_credentials() 8334 8335 self._match_text_seq("WITH") 8336 8337 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8338 8339 # Fallback case 8340 if self._curr: 8341 return self._parse_as_command(start) 8342 8343 return self.expression( 8344 exp.Copy, 8345 this=this, 8346 kind=kind, 8347 credentials=credentials, 8348 files=files, 8349 params=params, 8350 ) 8351 8352 def _parse_normalize(self) -> exp.Normalize: 8353 return self.expression( 8354 exp.Normalize, 8355 this=self._parse_bitwise(), 8356 form=self._match(TokenType.COMMA) and self._parse_var(), 8357 ) 8358 8359 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8360 args = self._parse_csv(lambda: self._parse_lambda()) 8361 8362 this = seq_get(args, 0) 8363 decimals = seq_get(args, 1) 8364 8365 return expr_type( 8366 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8367 ) 8368 8369 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8370 star_token = self._prev 8371 8372 if self._match_text_seq("COLUMNS", "(", advance=False): 8373 this = self._parse_function() 8374 if isinstance(this, exp.Columns): 8375 this.set("unpack", True) 8376 return this 8377 8378 return self.expression( 8379 exp.Star, 8380 **{ # type: ignore 8381 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8382 "replace": self._parse_star_op("REPLACE"), 8383 "rename": self._parse_star_op("RENAME"), 8384 }, 8385 ).update_positions(star_token) 8386 8387 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8388 privilege_parts = [] 8389 8390 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8391 # (end of privilege list) or L_PAREN (start of column list) are met 8392 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8393 privilege_parts.append(self._curr.text.upper()) 8394 self._advance() 8395 8396 this = exp.var(" ".join(privilege_parts)) 8397 expressions = ( 8398 self._parse_wrapped_csv(self._parse_column) 8399 if self._match(TokenType.L_PAREN, advance=False) 8400 else None 8401 ) 8402 8403 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8404 8405 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8406 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8407 principal = self._parse_id_var() 8408 8409 if not principal: 8410 return None 8411 8412 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8413 8414 def _parse_grant(self) -> exp.Grant | exp.Command: 8415 start = self._prev 8416 8417 privileges = self._parse_csv(self._parse_grant_privilege) 8418 8419 self._match(TokenType.ON) 8420 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8421 8422 # Attempt to parse the securable e.g. MySQL allows names 8423 # such as "foo.*", "*.*" which are not easily parseable yet 8424 securable = self._try_parse(self._parse_table_parts) 8425 8426 if not securable or not self._match_text_seq("TO"): 8427 return self._parse_as_command(start) 8428 8429 principals = self._parse_csv(self._parse_grant_principal) 8430 8431 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8432 8433 if self._curr: 8434 return self._parse_as_command(start) 8435 8436 return self.expression( 8437 exp.Grant, 8438 privileges=privileges, 8439 kind=kind, 8440 securable=securable, 8441 principals=principals, 8442 grant_option=grant_option, 8443 ) 8444 8445 def _parse_overlay(self) -> exp.Overlay: 8446 return self.expression( 8447 exp.Overlay, 8448 **{ # type: ignore 8449 "this": self._parse_bitwise(), 8450 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8451 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8452 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8453 }, 8454 ) 8455 8456 def _parse_format_name(self) -> exp.Property: 8457 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8458 # for FILE_FORMAT = <format_name> 8459 return self.expression( 8460 exp.Property, 8461 this=exp.var("FORMAT_NAME"), 8462 value=self._parse_string() or self._parse_table_parts(), 8463 ) 8464 8465 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8466 args: t.List[exp.Expression] = [] 8467 8468 if self._match(TokenType.DISTINCT): 8469 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8470 self._match(TokenType.COMMA) 8471 8472 args.extend(self._parse_csv(self._parse_assignment)) 8473 8474 return self.expression( 8475 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8476 ) 8477 8478 def _identifier_expression( 8479 self, token: t.Optional[Token] = None, **kwargs: t.Any 8480 ) -> exp.Identifier: 8481 token = token or self._prev 8482 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8483 expression.update_positions(token) 8484 return expression 8485 8486 def _build_pipe_cte( 8487 self, 8488 query: exp.Query, 8489 expressions: t.List[exp.Expression], 8490 alias_cte: t.Optional[exp.TableAlias] = None, 8491 ) -> exp.Select: 8492 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8493 if alias_cte: 8494 new_cte = alias_cte 8495 else: 8496 self._pipe_cte_counter += 1 8497 new_cte = f"__tmp{self._pipe_cte_counter}" 8498 8499 with_ = query.args.get("with") 8500 ctes = with_.pop() if with_ else None 8501 8502 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8503 if ctes: 8504 new_select.set("with", ctes) 8505 8506 return new_select.with_(new_cte, as_=query, copy=False) 8507 8508 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8509 select = self._parse_select(consume_pipe=False) 8510 if not select: 8511 return query 8512 8513 return self._build_pipe_cte( 8514 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8515 ) 8516 8517 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8518 limit = self._parse_limit() 8519 offset = self._parse_offset() 8520 if limit: 8521 curr_limit = query.args.get("limit", limit) 8522 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8523 query.limit(limit, copy=False) 8524 if offset: 8525 curr_offset = query.args.get("offset") 8526 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8527 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8528 8529 return query 8530 8531 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8532 this = self._parse_assignment() 8533 if self._match_text_seq("GROUP", "AND", advance=False): 8534 return this 8535 8536 this = self._parse_alias(this) 8537 8538 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8539 return self._parse_ordered(lambda: this) 8540 8541 return this 8542 8543 def _parse_pipe_syntax_aggregate_group_order_by( 8544 self, query: exp.Select, group_by_exists: bool = True 8545 ) -> exp.Select: 8546 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8547 aggregates_or_groups, orders = [], [] 8548 for element in expr: 8549 if isinstance(element, exp.Ordered): 8550 this = element.this 8551 if isinstance(this, exp.Alias): 8552 element.set("this", this.args["alias"]) 8553 orders.append(element) 8554 else: 8555 this = element 8556 aggregates_or_groups.append(this) 8557 8558 if group_by_exists: 8559 query.select(*aggregates_or_groups, copy=False).group_by( 8560 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8561 copy=False, 8562 ) 8563 else: 8564 query.select(*aggregates_or_groups, append=False, copy=False) 8565 8566 if orders: 8567 return query.order_by(*orders, append=False, copy=False) 8568 8569 return query 8570 8571 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8572 self._match_text_seq("AGGREGATE") 8573 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8574 8575 if self._match(TokenType.GROUP_BY) or ( 8576 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8577 ): 8578 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8579 8580 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8581 8582 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8583 first_setop = self.parse_set_operation(this=query) 8584 if not first_setop: 8585 return None 8586 8587 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8588 expr = self._parse_paren() 8589 return expr.assert_is(exp.Subquery).unnest() if expr else None 8590 8591 first_setop.this.pop() 8592 8593 setops = [ 8594 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8595 *self._parse_csv(_parse_and_unwrap_query), 8596 ] 8597 8598 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8599 with_ = query.args.get("with") 8600 ctes = with_.pop() if with_ else None 8601 8602 if isinstance(first_setop, exp.Union): 8603 query = query.union(*setops, copy=False, **first_setop.args) 8604 elif isinstance(first_setop, exp.Except): 8605 query = query.except_(*setops, copy=False, **first_setop.args) 8606 else: 8607 query = query.intersect(*setops, copy=False, **first_setop.args) 8608 8609 query.set("with", ctes) 8610 8611 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8612 8613 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8614 join = self._parse_join() 8615 if not join: 8616 return None 8617 8618 if isinstance(query, exp.Select): 8619 return query.join(join, copy=False) 8620 8621 return query 8622 8623 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8624 pivots = self._parse_pivots() 8625 if not pivots: 8626 return query 8627 8628 from_ = query.args.get("from") 8629 if from_: 8630 from_.this.set("pivots", pivots) 8631 8632 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8633 8634 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8635 self._match_text_seq("EXTEND") 8636 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8637 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8638 8639 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8640 sample = self._parse_table_sample() 8641 8642 with_ = query.args.get("with") 8643 if with_: 8644 with_.expressions[-1].this.set("sample", sample) 8645 else: 8646 query.set("sample", sample) 8647 8648 return query 8649 8650 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8651 if isinstance(query, exp.Subquery): 8652 query = exp.select("*").from_(query, copy=False) 8653 8654 if not query.args.get("from"): 8655 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8656 8657 while self._match(TokenType.PIPE_GT): 8658 start = self._curr 8659 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8660 if not parser: 8661 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8662 # keywords, making it tricky to disambiguate them without lookahead. The approach 8663 # here is to try and parse a set operation and if that fails, then try to parse a 8664 # join operator. If that fails as well, then the operator is not supported. 8665 parsed_query = self._parse_pipe_syntax_set_operator(query) 8666 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8667 if not parsed_query: 8668 self._retreat(start) 8669 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8670 break 8671 query = parsed_query 8672 else: 8673 query = parser(self, query) 8674 8675 return query 8676 8677 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8678 vars = self._parse_csv(self._parse_id_var) 8679 if not vars: 8680 return None 8681 8682 return self.expression( 8683 exp.DeclareItem, 8684 this=vars, 8685 kind=self._parse_types(), 8686 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8687 ) 8688 8689 def _parse_declare(self) -> exp.Declare | exp.Command: 8690 start = self._prev 8691 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8692 8693 if not expressions or self._curr: 8694 return self._parse_as_command(start) 8695 8696 return self.expression(exp.Declare, expressions=expressions) 8697 8698 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8699 exp_class = exp.Cast if strict else exp.TryCast 8700 8701 if exp_class == exp.TryCast: 8702 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8703 8704 return self.expression(exp_class, **kwargs) 8705 8706 def _parse_json_value(self) -> exp.JSONValue: 8707 this = self._parse_bitwise() 8708 self._match(TokenType.COMMA) 8709 path = self._parse_bitwise() 8710 8711 returning = self._match(TokenType.RETURNING) and self._parse_type() 8712 8713 return self.expression( 8714 exp.JSONValue, 8715 this=this, 8716 path=self.dialect.to_json_path(path), 8717 returning=returning, 8718 on_condition=self._parse_on_condition(), 8719 ) 8720 8721 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8722 def concat_exprs( 8723 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8724 ) -> exp.Expression: 8725 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8726 concat_exprs = [ 8727 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8728 ] 8729 node.set("expressions", concat_exprs) 8730 return node 8731 if len(exprs) == 1: 8732 return exprs[0] 8733 return self.expression(exp.Concat, expressions=args, safe=True) 8734 8735 args = self._parse_csv(self._parse_lambda) 8736 8737 if args: 8738 order = args[-1] if isinstance(args[-1], exp.Order) else None 8739 8740 if order: 8741 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8742 # remove 'expr' from exp.Order and add it back to args 8743 args[-1] = order.this 8744 order.set("this", concat_exprs(order.this, args)) 8745 8746 this = order or concat_exprs(args[0], args) 8747 else: 8748 this = None 8749 8750 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8751 8752 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1573 def __init__( 1574 self, 1575 error_level: t.Optional[ErrorLevel] = None, 1576 error_message_context: int = 100, 1577 max_errors: int = 3, 1578 dialect: DialectType = None, 1579 ): 1580 from sqlglot.dialects import Dialect 1581 1582 self.error_level = error_level or ErrorLevel.IMMEDIATE 1583 self.error_message_context = error_message_context 1584 self.max_errors = max_errors 1585 self.dialect = Dialect.get_or_raise(dialect) 1586 self.reset()
1599 def parse( 1600 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens and returns a list of syntax trees, one tree 1604 per parsed SQL statement. 1605 1606 Args: 1607 raw_tokens: The list of tokens. 1608 sql: The original SQL string, used to produce helpful debug messages. 1609 1610 Returns: 1611 The list of the produced syntax trees. 1612 """ 1613 return self._parse( 1614 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1615 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1617 def parse_into( 1618 self, 1619 expression_types: exp.IntoType, 1620 raw_tokens: t.List[Token], 1621 sql: t.Optional[str] = None, 1622 ) -> t.List[t.Optional[exp.Expression]]: 1623 """ 1624 Parses a list of tokens into a given Expression type. If a collection of Expression 1625 types is given instead, this method will try to parse the token list into each one 1626 of them, stopping at the first for which the parsing succeeds. 1627 1628 Args: 1629 expression_types: The expression type(s) to try and parse the token list into. 1630 raw_tokens: The list of tokens. 1631 sql: The original SQL string, used to produce helpful debug messages. 1632 1633 Returns: 1634 The target Expression. 1635 """ 1636 errors = [] 1637 for expression_type in ensure_list(expression_types): 1638 parser = self.EXPRESSION_PARSERS.get(expression_type) 1639 if not parser: 1640 raise TypeError(f"No parser registered for {expression_type}") 1641 1642 try: 1643 return self._parse(parser, raw_tokens, sql) 1644 except ParseError as e: 1645 e.errors[0]["into_expression"] = expression_type 1646 errors.append(e) 1647 1648 raise ParseError( 1649 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1650 errors=merge_errors(errors), 1651 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1691 def check_errors(self) -> None: 1692 """Logs or raises any found errors, depending on the chosen error level setting.""" 1693 if self.error_level == ErrorLevel.WARN: 1694 for error in self.errors: 1695 logger.error(str(error)) 1696 elif self.error_level == ErrorLevel.RAISE and self.errors: 1697 raise ParseError( 1698 concat_messages(self.errors, self.max_errors), 1699 errors=merge_errors(self.errors), 1700 )
Logs or raises any found errors, depending on the chosen error level setting.
1702 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1703 """ 1704 Appends an error in the list of recorded errors or raises it, depending on the chosen 1705 error level setting. 1706 """ 1707 token = token or self._curr or self._prev or Token.string("") 1708 start = token.start 1709 end = token.end + 1 1710 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1711 highlight = self.sql[start:end] 1712 end_context = self.sql[end : end + self.error_message_context] 1713 1714 error = ParseError.new( 1715 f"{message}. Line {token.line}, Col: {token.col}.\n" 1716 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1717 description=message, 1718 line=token.line, 1719 col=token.col, 1720 start_context=start_context, 1721 highlight=highlight, 1722 end_context=end_context, 1723 ) 1724 1725 if self.error_level == ErrorLevel.IMMEDIATE: 1726 raise error 1727 1728 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1730 def expression( 1731 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1732 ) -> E: 1733 """ 1734 Creates a new, validated Expression. 1735 1736 Args: 1737 exp_class: The expression class to instantiate. 1738 comments: An optional list of comments to attach to the expression. 1739 kwargs: The arguments to set for the expression along with their respective values. 1740 1741 Returns: 1742 The target expression. 1743 """ 1744 instance = exp_class(**kwargs) 1745 instance.add_comments(comments) if comments else self._add_comments(instance) 1746 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1753 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1754 """ 1755 Validates an Expression, making sure that all its mandatory arguments are set. 1756 1757 Args: 1758 expression: The expression to validate. 1759 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1760 1761 Returns: 1762 The validated expression. 1763 """ 1764 if self.error_level != ErrorLevel.IGNORE: 1765 for error_message in expression.error_messages(args): 1766 self.raise_error(error_message) 1767 1768 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4838 def parse_set_operation( 4839 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4840 ) -> t.Optional[exp.Expression]: 4841 start = self._index 4842 _, side_token, kind_token = self._parse_join_parts() 4843 4844 side = side_token.text if side_token else None 4845 kind = kind_token.text if kind_token else None 4846 4847 if not self._match_set(self.SET_OPERATIONS): 4848 self._retreat(start) 4849 return None 4850 4851 token_type = self._prev.token_type 4852 4853 if token_type == TokenType.UNION: 4854 operation: t.Type[exp.SetOperation] = exp.Union 4855 elif token_type == TokenType.EXCEPT: 4856 operation = exp.Except 4857 else: 4858 operation = exp.Intersect 4859 4860 comments = self._prev.comments 4861 4862 if self._match(TokenType.DISTINCT): 4863 distinct: t.Optional[bool] = True 4864 elif self._match(TokenType.ALL): 4865 distinct = False 4866 else: 4867 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4868 if distinct is None: 4869 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4870 4871 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4872 "STRICT", "CORRESPONDING" 4873 ) 4874 if self._match_text_seq("CORRESPONDING"): 4875 by_name = True 4876 if not side and not kind: 4877 kind = "INNER" 4878 4879 on_column_list = None 4880 if by_name and self._match_texts(("ON", "BY")): 4881 on_column_list = self._parse_wrapped_csv(self._parse_column) 4882 4883 expression = self._parse_select( 4884 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4885 ) 4886 4887 return self.expression( 4888 operation, 4889 comments=comments, 4890 this=this, 4891 distinct=distinct, 4892 by_name=by_name, 4893 expression=expression, 4894 side=side, 4895 kind=kind, 4896 on=on_column_list, 4897 )